diff --git a/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..65a4a06280b655f5811b3961638b1acb2c360237
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5804542da569c631a754ccc129c66669acc58ea46cf1f8b6802147a2d5528bb9
+size 243
diff --git a/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..263e0f80ccc220338a196ce78b25a1ff5242ddd9
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d8eb697b6f277c3262741c03978dac85c9d74efeb56269e949d7410cb80ab84
+size 434
diff --git a/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..3953c1f6c8f9566a3fd2b544ad6a25b4c2b28768
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,92 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (4 bits), Sparse)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1280 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 1280, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 4 × 1280 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[4, 1280, 1, 1536]",
+        "name" : "encoder_attn_key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 4 × 1280 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[4, 1280, 1, 1536]",
+        "name" : "encoder_attn_value_cache",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.constexprLutToDense" : 202,
+      "Ios18.constexprSparseToDense" : 200,
+      "Ios18.conv" : 404,
+      "Ios18.matmul" : 64,
+      "Ios18.batchNorm" : 65,
+      "Pad" : 2,
+      "Ios18.gelu" : 34,
+      "Ios18.concat" : 2,
+      "Ios18.add" : 267,
+      "Ios18.softmax" : 32,
+      "Ios18.layerNorm" : 65,
+      "Ios18.reshape" : 128,
+      "Ios18.mul" : 32
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 128 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoderStateful",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mil b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..965df545c1cfd4278835eab246e1c9bc18fc2b39
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,4383 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 128, 1, 3000]> melspectrogram_features) {
+            string var_124_pad_type_0 = const()[name = string("op_124_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_124_pad_0 = const()[name = string("op_124_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_124_strides_0 = const()[name = string("op_124_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_124_dilations_0 = const()[name = string("op_124_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_124_groups_0 = const()[name = string("op_124_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 128, 1, 3]> var_93_to_fp16 = const()[name = string("op_93_to_fp16"), val = tensor<fp16, [1280, 128, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1280]> var_105_to_fp16 = const()[name = string("op_105_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(983168)))];
+            tensor<fp16, [1, 1280, 1, 3000]> var_124_cast_fp16 = conv(bias = var_105_to_fp16, dilations = var_124_dilations_0, groups = var_124_groups_0, pad = var_124_pad_0, pad_type = var_124_pad_type_0, strides = var_124_strides_0, weight = var_93_to_fp16, x = melspectrogram_features)[name = string("op_124_cast_fp16")];
+            string var_162_pad_type_0 = const()[name = string("op_162_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_162_pad_0 = const()[name = string("op_162_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_162_strides_0 = const()[name = string("op_162_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_162_dilations_0 = const()[name = string("op_162_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_162_groups_0 = const()[name = string("op_162_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 128, 1, 3]> op_137_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 128, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(985792))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1231616))))[name = string("op_137_to_fp16_palettized")];
+            tensor<fp16, [1280]> var_143_to_fp16 = const()[name = string("op_143_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1231744)))];
+            tensor<fp16, [1, 1280, 1, 3000]> var_162_cast_fp16 = conv(bias = var_143_to_fp16, dilations = var_162_dilations_0, groups = var_162_groups_0, pad = var_162_pad_0, pad_type = var_162_pad_type_0, strides = var_162_strides_0, weight = op_137_to_fp16_palettized, x = melspectrogram_features)[name = string("op_162_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 3000]> var_164_cast_fp16 = add(x = var_124_cast_fp16, y = var_162_cast_fp16)[name = string("op_164_cast_fp16")];
+            string hidden_states_1_mode_0 = const()[name = string("hidden_states_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1280, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_164_cast_fp16)[name = string("hidden_states_1_cast_fp16")];
+            string var_210_pad_type_0 = const()[name = string("op_210_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_210_pad_0 = const()[name = string("op_210_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_210_strides_0 = const()[name = string("op_210_strides_0"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_210_dilations_0 = const()[name = string("op_210_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_210_groups_0 = const()[name = string("op_210_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 3]> var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = tensor<fp16, [1280, 1280, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1234368)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_210_cast_fp16 = conv(bias = var_105_to_fp16, dilations = var_210_dilations_0, groups = var_210_groups_0, pad = var_210_pad_0, pad_type = var_210_pad_type_0, strides = var_210_strides_0, weight = var_179_to_fp16, x = hidden_states_1_cast_fp16)[name = string("op_210_cast_fp16")];
+            string var_248_pad_type_0 = const()[name = string("op_248_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_248_pad_0 = const()[name = string("op_248_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_248_strides_0 = const()[name = string("op_248_strides_0"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_248_dilations_0 = const()[name = string("op_248_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_248_groups_0 = const()[name = string("op_248_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 3]> op_223_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11064832))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13522496))))[name = string("op_223_to_fp16_palettized")];
+            tensor<fp16, [1280]> var_229_to_fp16 = const()[name = string("op_229_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13522624)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_248_cast_fp16 = conv(bias = var_229_to_fp16, dilations = var_248_dilations_0, groups = var_248_groups_0, pad = var_248_pad_0, pad_type = var_248_pad_type_0, strides = var_248_strides_0, weight = op_223_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = string("op_248_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_250_cast_fp16 = add(x = var_210_cast_fp16, y = var_248_cast_fp16)[name = string("op_250_cast_fp16")];
+            string hidden_states_3_mode_0 = const()[name = string("hidden_states_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_250_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_270_to_fp16 = const()[name = string("op_270_to_fp16"), val = tensor<fp16, [1, 1280, 1, 1500]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13525248)))];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_270_to_fp16)[name = string("inputs_1_cast_fp16")];
+            int32 var_284 = const()[name = string("op_284"), val = int32(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_303_to_fp16 = const()[name = string("op_303_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_303_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")];
+            tensor<fp16, [1280]> obj_1_variance_0_to_fp16 = const()[name = string("obj_1_variance_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17365312)))];
+            tensor<fp16, [1280]> obj_1_gamma_0_to_fp16 = const()[name = string("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17367936)))];
+            tensor<fp16, [1280]> obj_1_beta_0_to_fp16 = const()[name = string("obj_1_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17370560)))];
+            fp16 obj_1_epsilon_0_to_fp16 = const()[name = string("obj_1_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_1_cast_fp16")];
+            string var_325_pad_type_0 = const()[name = string("op_325_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_325_strides_0 = const()[name = string("op_325_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_325_pad_0 = const()[name = string("op_325_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_325_dilations_0 = const()[name = string("op_325_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_325_groups_0 = const()[name = string("op_325_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17373184))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18192448))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18192576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_325_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_325_dilations_0, groups = var_325_groups_0, pad = var_325_pad_0, pad_type = var_325_pad_type_0, strides = var_325_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_325_cast_fp16")];
+            string var_331_pad_type_0 = const()[name = string("op_331_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_331_strides_0 = const()[name = string("op_331_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_331_pad_0 = const()[name = string("op_331_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_331_dilations_0 = const()[name = string("op_331_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_331_groups_0 = const()[name = string("op_331_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18275392))), nonzero_data = tensor<fp16, [40058]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18195200))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_331_cast_fp16 = conv(dilations = var_331_dilations_0, groups = var_331_groups_0, pad = var_331_pad_0, pad_type = var_331_pad_type_0, strides = var_331_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_331_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_1_cast_fp16 = add(x = var_325_cast_fp16, y = var_331_cast_fp16)[name = string("query_1_cast_fp16")];
+            string var_340_pad_type_0 = const()[name = string("op_340_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_340_strides_0 = const()[name = string("op_340_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_340_pad_0 = const()[name = string("op_340_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_340_dilations_0 = const()[name = string("op_340_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_340_groups_0 = const()[name = string("op_340_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18480256))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19299520))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_340_cast_fp16 = conv(dilations = var_340_dilations_0, groups = var_340_groups_0, pad = var_340_pad_0, pad_type = var_340_pad_type_0, strides = var_340_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_340_cast_fp16")];
+            string var_346_pad_type_0 = const()[name = string("op_346_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_346_strides_0 = const()[name = string("op_346_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_346_pad_0 = const()[name = string("op_346_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_346_dilations_0 = const()[name = string("op_346_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_346_groups_0 = const()[name = string("op_346_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19363520))), nonzero_data = tensor<fp16, [31892]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19299648))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_346_cast_fp16 = conv(dilations = var_346_dilations_0, groups = var_346_groups_0, pad = var_346_pad_0, pad_type = var_346_pad_type_0, strides = var_346_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_346_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_1_cast_fp16 = add(x = var_340_cast_fp16, y = var_346_cast_fp16)[name = string("key_1_cast_fp16")];
+            string var_356_pad_type_0 = const()[name = string("op_356_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_356_strides_0 = const()[name = string("op_356_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_356_pad_0 = const()[name = string("op_356_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_356_dilations_0 = const()[name = string("op_356_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_356_groups_0 = const()[name = string("op_356_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19568384))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20387648))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20387776)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_356_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_356_dilations_0, groups = var_356_groups_0, pad = var_356_pad_0, pad_type = var_356_pad_type_0, strides = var_356_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_356_cast_fp16")];
+            string var_362_pad_type_0 = const()[name = string("op_362_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_362_strides_0 = const()[name = string("op_362_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_362_pad_0 = const()[name = string("op_362_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_362_dilations_0 = const()[name = string("op_362_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_362_groups_0 = const()[name = string("op_362_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20474048))), nonzero_data = tensor<fp16, [41792]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20390400))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_362_cast_fp16 = conv(dilations = var_362_dilations_0, groups = var_362_groups_0, pad = var_362_pad_0, pad_type = var_362_pad_type_0, strides = var_362_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_362_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_1_cast_fp16 = add(x = var_356_cast_fp16, y = var_362_cast_fp16)[name = string("value_1_cast_fp16")];
+            tensor<int32, [4]> var_365 = const()[name = string("op_365"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_1_cast_fp16 = reshape(shape = var_365, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")];
+            fp16 var_367_to_fp16 = const()[name = string("op_367_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_368_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_367_to_fp16)[name = string("op_368_cast_fp16")];
+            tensor<int32, [4]> var_369 = const()[name = string("op_369"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_370_cast_fp16 = reshape(shape = var_369, x = key_1_cast_fp16)[name = string("op_370_cast_fp16")];
+            bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)];
+            bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_368_cast_fp16, y = var_370_cast_fp16)[name = string("mh_w_1_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_373_cast_fp16 = softmax(axis = var_284, x = mh_w_1_cast_fp16)[name = string("op_373_cast_fp16")];
+            tensor<int32, [4]> var_374 = const()[name = string("op_374"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_375_cast_fp16 = reshape(shape = var_374, x = value_1_cast_fp16)[name = string("op_375_cast_fp16")];
+            bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)];
+            bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_375_cast_fp16, y = var_373_cast_fp16)[name = string("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_378 = const()[name = string("op_378"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_1_cast_fp16 = reshape(shape = var_378, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_388_pad_type_0 = const()[name = string("op_388_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_388_strides_0 = const()[name = string("op_388_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_388_pad_0 = const()[name = string("op_388_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_388_dilations_0 = const()[name = string("op_388_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_388_groups_0 = const()[name = string("op_388_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20678912))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21498176))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21498304)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_388_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_388_dilations_0, groups = var_388_groups_0, pad = var_388_pad_0, pad_type = var_388_pad_type_0, strides = var_388_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_388_cast_fp16")];
+            string var_394_pad_type_0 = const()[name = string("op_394_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_394_strides_0 = const()[name = string("op_394_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_394_pad_0 = const()[name = string("op_394_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_394_dilations_0 = const()[name = string("op_394_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_394_groups_0 = const()[name = string("op_394_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21553024))), nonzero_data = tensor<fp16, [25985]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21500928))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_394_cast_fp16 = conv(dilations = var_394_dilations_0, groups = var_394_groups_0, pad = var_394_pad_0, pad_type = var_394_pad_type_0, strides = var_394_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_394_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_3_cast_fp16 = add(x = var_388_cast_fp16, y = var_394_cast_fp16)[name = string("obj_3_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = string("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_405_to_fp16 = const()[name = string("op_405_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_405_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")];
+            tensor<fp16, [1280]> input_3_gamma_0_to_fp16 = const()[name = string("input_3_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21757888)))];
+            tensor<fp16, [1280]> input_3_beta_0_to_fp16 = const()[name = string("input_3_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21760512)))];
+            fp16 input_3_epsilon_0_to_fp16 = const()[name = string("input_3_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("input_3_cast_fp16")];
+            string var_423_pad_type_0 = const()[name = string("op_423_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_423_strides_0 = const()[name = string("op_423_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_423_pad_0 = const()[name = string("op_423_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_423_dilations_0 = const()[name = string("op_423_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_423_groups_0 = const()[name = string("op_423_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21763136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25040000))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25040128)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_423_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_423_dilations_0, groups = var_423_groups_0, pad = var_423_pad_0, pad_type = var_423_pad_type_0, strides = var_423_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_423_cast_fp16")];
+            string var_429_pad_type_0 = const()[name = string("op_429_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_429_strides_0 = const()[name = string("op_429_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_429_pad_0 = const()[name = string("op_429_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_429_dilations_0 = const()[name = string("op_429_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_429_groups_0 = const()[name = string("op_429_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25353152))), nonzero_data = tensor<fp16, [151326]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25050432))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_429_cast_fp16 = conv(dilations = var_429_dilations_0, groups = var_429_groups_0, pad = var_429_pad_0, pad_type = var_429_pad_type_0, strides = var_429_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_429_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_5_cast_fp16 = add(x = var_423_cast_fp16, y = var_429_cast_fp16)[name = string("input_5_cast_fp16")];
+            string input_7_mode_0 = const()[name = string("input_7_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
+            string var_440_pad_type_0 = const()[name = string("op_440_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_440_strides_0 = const()[name = string("op_440_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_440_pad_0 = const()[name = string("op_440_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_440_dilations_0 = const()[name = string("op_440_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_440_groups_0 = const()[name = string("op_440_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26172416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29449280))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29449408)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_440_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_440_dilations_0, groups = var_440_groups_0, pad = var_440_pad_0, pad_type = var_440_pad_type_0, strides = var_440_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = string("op_440_cast_fp16")];
+            string var_446_pad_type_0 = const()[name = string("op_446_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_446_strides_0 = const()[name = string("op_446_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_446_pad_0 = const()[name = string("op_446_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_446_dilations_0 = const()[name = string("op_446_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_446_groups_0 = const()[name = string("op_446_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29689088))), nonzero_data = tensor<fp16, [118467]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29452032))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_446_cast_fp16 = conv(dilations = var_446_dilations_0, groups = var_446_groups_0, pad = var_446_pad_0, pad_type = var_446_pad_type_0, strides = var_446_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = string("op_446_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_5_cast_fp16 = add(x = var_440_cast_fp16, y = var_446_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_5_cast_fp16")];
+            int32 var_456 = const()[name = string("op_456"), val = int32(3)];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_475_to_fp16 = const()[name = string("op_475_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_475_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")];
+            tensor<fp16, [1280]> obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30508352)))];
+            tensor<fp16, [1280]> obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30510976)))];
+            fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("obj_5_cast_fp16")];
+            string var_497_pad_type_0 = const()[name = string("op_497_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_497_strides_0 = const()[name = string("op_497_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_497_pad_0 = const()[name = string("op_497_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_497_dilations_0 = const()[name = string("op_497_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_497_groups_0 = const()[name = string("op_497_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30513600))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31332864))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31332992)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_497_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_497_dilations_0, groups = var_497_groups_0, pad = var_497_pad_0, pad_type = var_497_pad_type_0, strides = var_497_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_497_cast_fp16")];
+            string var_503_pad_type_0 = const()[name = string("op_503_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_503_strides_0 = const()[name = string("op_503_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_503_pad_0 = const()[name = string("op_503_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_503_dilations_0 = const()[name = string("op_503_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_503_groups_0 = const()[name = string("op_503_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31399552))), nonzero_data = tensor<fp16, [31911]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31335616))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_503_cast_fp16 = conv(dilations = var_503_dilations_0, groups = var_503_groups_0, pad = var_503_pad_0, pad_type = var_503_pad_type_0, strides = var_503_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_503_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_3_cast_fp16 = add(x = var_497_cast_fp16, y = var_503_cast_fp16)[name = string("query_3_cast_fp16")];
+            string var_512_pad_type_0 = const()[name = string("op_512_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_512_strides_0 = const()[name = string("op_512_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_512_pad_0 = const()[name = string("op_512_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_512_dilations_0 = const()[name = string("op_512_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_512_groups_0 = const()[name = string("op_512_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31604416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32423680))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_512_cast_fp16 = conv(dilations = var_512_dilations_0, groups = var_512_groups_0, pad = var_512_pad_0, pad_type = var_512_pad_type_0, strides = var_512_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_512_cast_fp16")];
+            string var_518_pad_type_0 = const()[name = string("op_518_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_518_strides_0 = const()[name = string("op_518_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_518_pad_0 = const()[name = string("op_518_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_518_dilations_0 = const()[name = string("op_518_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_518_groups_0 = const()[name = string("op_518_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32475264))), nonzero_data = tensor<fp16, [25674]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32423808))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_518_cast_fp16 = conv(dilations = var_518_dilations_0, groups = var_518_groups_0, pad = var_518_pad_0, pad_type = var_518_pad_type_0, strides = var_518_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_518_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_3_cast_fp16 = add(x = var_512_cast_fp16, y = var_518_cast_fp16)[name = string("key_3_cast_fp16")];
+            string var_528_pad_type_0 = const()[name = string("op_528_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_528_strides_0 = const()[name = string("op_528_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_528_pad_0 = const()[name = string("op_528_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_528_dilations_0 = const()[name = string("op_528_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_528_groups_0 = const()[name = string("op_528_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32680128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33499392))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33499520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_528_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_528_dilations_0, groups = var_528_groups_0, pad = var_528_pad_0, pad_type = var_528_pad_type_0, strides = var_528_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_528_cast_fp16")];
+            string var_534_pad_type_0 = const()[name = string("op_534_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_534_strides_0 = const()[name = string("op_534_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_534_pad_0 = const()[name = string("op_534_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_534_dilations_0 = const()[name = string("op_534_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_534_groups_0 = const()[name = string("op_534_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33549632))), nonzero_data = tensor<fp16, [23687]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33502144))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_534_cast_fp16 = conv(dilations = var_534_dilations_0, groups = var_534_groups_0, pad = var_534_pad_0, pad_type = var_534_pad_type_0, strides = var_534_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_534_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_3_cast_fp16 = add(x = var_528_cast_fp16, y = var_534_cast_fp16)[name = string("value_3_cast_fp16")];
+            tensor<int32, [4]> var_537 = const()[name = string("op_537"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_3_cast_fp16 = reshape(shape = var_537, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")];
+            fp16 var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_540_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_539_to_fp16)[name = string("op_540_cast_fp16")];
+            tensor<int32, [4]> var_541 = const()[name = string("op_541"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_542_cast_fp16 = reshape(shape = var_541, x = key_3_cast_fp16)[name = string("op_542_cast_fp16")];
+            bool mh_w_3_transpose_x_0 = const()[name = string("mh_w_3_transpose_x_0"), val = bool(true)];
+            bool mh_w_3_transpose_y_0 = const()[name = string("mh_w_3_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_540_cast_fp16, y = var_542_cast_fp16)[name = string("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_545_cast_fp16 = softmax(axis = var_456, x = mh_w_3_cast_fp16)[name = string("op_545_cast_fp16")];
+            tensor<int32, [4]> var_546 = const()[name = string("op_546"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_547_cast_fp16 = reshape(shape = var_546, x = value_3_cast_fp16)[name = string("op_547_cast_fp16")];
+            bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)];
+            bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_547_cast_fp16, y = var_545_cast_fp16)[name = string("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_550 = const()[name = string("op_550"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_9_cast_fp16 = reshape(shape = var_550, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")];
+            string var_560_pad_type_0 = const()[name = string("op_560_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_560_strides_0 = const()[name = string("op_560_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_560_pad_0 = const()[name = string("op_560_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_560_dilations_0 = const()[name = string("op_560_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_560_groups_0 = const()[name = string("op_560_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33754496))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34573760))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34573888)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_560_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_560_dilations_0, groups = var_560_groups_0, pad = var_560_pad_0, pad_type = var_560_pad_type_0, strides = var_560_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_560_cast_fp16")];
+            string var_566_pad_type_0 = const()[name = string("op_566_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_566_strides_0 = const()[name = string("op_566_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_566_pad_0 = const()[name = string("op_566_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_566_dilations_0 = const()[name = string("op_566_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_566_groups_0 = const()[name = string("op_566_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34612736))), nonzero_data = tensor<fp16, [18054]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34576512))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_566_cast_fp16 = conv(dilations = var_566_dilations_0, groups = var_566_groups_0, pad = var_566_pad_0, pad_type = var_566_pad_type_0, strides = var_566_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_566_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_7_cast_fp16 = add(x = var_560_cast_fp16, y = var_566_cast_fp16)[name = string("obj_7_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = string("inputs_7_cast_fp16")];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_577_to_fp16 = const()[name = string("op_577_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_577_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")];
+            tensor<fp16, [1280]> input_11_gamma_0_to_fp16 = const()[name = string("input_11_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34817600)))];
+            tensor<fp16, [1280]> input_11_beta_0_to_fp16 = const()[name = string("input_11_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34820224)))];
+            fp16 input_11_epsilon_0_to_fp16 = const()[name = string("input_11_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("input_11_cast_fp16")];
+            string var_595_pad_type_0 = const()[name = string("op_595_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_595_strides_0 = const()[name = string("op_595_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_595_pad_0 = const()[name = string("op_595_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_595_dilations_0 = const()[name = string("op_595_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_595_groups_0 = const()[name = string("op_595_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34822848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38099712))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38099840)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_595_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_595_dilations_0, groups = var_595_groups_0, pad = var_595_pad_0, pad_type = var_595_pad_type_0, strides = var_595_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_595_cast_fp16")];
+            string var_601_pad_type_0 = const()[name = string("op_601_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_601_strides_0 = const()[name = string("op_601_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_601_pad_0 = const()[name = string("op_601_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_601_dilations_0 = const()[name = string("op_601_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_601_groups_0 = const()[name = string("op_601_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38256320))), nonzero_data = tensor<fp16, [73044]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38110144))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_601_cast_fp16 = conv(dilations = var_601_dilations_0, groups = var_601_groups_0, pad = var_601_pad_0, pad_type = var_601_pad_type_0, strides = var_601_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_601_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_13_cast_fp16 = add(x = var_595_cast_fp16, y = var_601_cast_fp16)[name = string("input_13_cast_fp16")];
+            string input_15_mode_0 = const()[name = string("input_15_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = string("input_15_cast_fp16")];
+            string var_612_pad_type_0 = const()[name = string("op_612_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_612_strides_0 = const()[name = string("op_612_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_612_pad_0 = const()[name = string("op_612_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_612_dilations_0 = const()[name = string("op_612_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_612_groups_0 = const()[name = string("op_612_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39075584))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42352448))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42352576)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_612_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_612_dilations_0, groups = var_612_groups_0, pad = var_612_pad_0, pad_type = var_612_pad_type_0, strides = var_612_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_612_cast_fp16")];
+            string var_618_pad_type_0 = const()[name = string("op_618_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_618_strides_0 = const()[name = string("op_618_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_618_pad_0 = const()[name = string("op_618_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_618_dilations_0 = const()[name = string("op_618_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_618_groups_0 = const()[name = string("op_618_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42567552))), nonzero_data = tensor<fp16, [106134]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42355200))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_618_cast_fp16 = conv(dilations = var_618_dilations_0, groups = var_618_groups_0, pad = var_618_pad_0, pad_type = var_618_pad_type_0, strides = var_618_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_618_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_7_cast_fp16 = add(x = var_612_cast_fp16, y = var_618_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_9_cast_fp16")];
+            int32 var_628 = const()[name = string("op_628"), val = int32(3)];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_647_to_fp16 = const()[name = string("op_647_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_647_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")];
+            tensor<fp16, [1280]> obj_9_gamma_0_to_fp16 = const()[name = string("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43386816)))];
+            tensor<fp16, [1280]> obj_9_beta_0_to_fp16 = const()[name = string("obj_9_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43389440)))];
+            fp16 obj_9_epsilon_0_to_fp16 = const()[name = string("obj_9_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_9_cast_fp16")];
+            string var_669_pad_type_0 = const()[name = string("op_669_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_669_strides_0 = const()[name = string("op_669_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_669_pad_0 = const()[name = string("op_669_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_669_dilations_0 = const()[name = string("op_669_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_669_groups_0 = const()[name = string("op_669_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43392064))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44211328))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44211456)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_669_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_669_dilations_0, groups = var_669_groups_0, pad = var_669_pad_0, pad_type = var_669_pad_type_0, strides = var_669_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_669_cast_fp16")];
+            string var_675_pad_type_0 = const()[name = string("op_675_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_675_strides_0 = const()[name = string("op_675_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_675_pad_0 = const()[name = string("op_675_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_675_dilations_0 = const()[name = string("op_675_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_675_groups_0 = const()[name = string("op_675_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44271552))), nonzero_data = tensor<fp16, [28690]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44214080))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_675_cast_fp16 = conv(dilations = var_675_dilations_0, groups = var_675_groups_0, pad = var_675_pad_0, pad_type = var_675_pad_type_0, strides = var_675_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_675_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_5_cast_fp16 = add(x = var_669_cast_fp16, y = var_675_cast_fp16)[name = string("query_5_cast_fp16")];
+            string var_684_pad_type_0 = const()[name = string("op_684_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_684_strides_0 = const()[name = string("op_684_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_684_pad_0 = const()[name = string("op_684_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_684_dilations_0 = const()[name = string("op_684_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_684_groups_0 = const()[name = string("op_684_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44476416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45295680))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_684_cast_fp16 = conv(dilations = var_684_dilations_0, groups = var_684_groups_0, pad = var_684_pad_0, pad_type = var_684_pad_type_0, strides = var_684_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_684_cast_fp16")];
+            string var_690_pad_type_0 = const()[name = string("op_690_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_690_strides_0 = const()[name = string("op_690_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_690_pad_0 = const()[name = string("op_690_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_690_dilations_0 = const()[name = string("op_690_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_690_groups_0 = const()[name = string("op_690_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45344832))), nonzero_data = tensor<fp16, [24459]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45295808))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_690_cast_fp16 = conv(dilations = var_690_dilations_0, groups = var_690_groups_0, pad = var_690_pad_0, pad_type = var_690_pad_type_0, strides = var_690_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_690_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_5_cast_fp16 = add(x = var_684_cast_fp16, y = var_690_cast_fp16)[name = string("key_5_cast_fp16")];
+            string var_700_pad_type_0 = const()[name = string("op_700_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_700_strides_0 = const()[name = string("op_700_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_700_pad_0 = const()[name = string("op_700_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_700_dilations_0 = const()[name = string("op_700_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_700_groups_0 = const()[name = string("op_700_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45549696))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46368960))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46369088)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_700_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_700_dilations_0, groups = var_700_groups_0, pad = var_700_pad_0, pad_type = var_700_pad_type_0, strides = var_700_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_700_cast_fp16")];
+            string var_706_pad_type_0 = const()[name = string("op_706_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_706_strides_0 = const()[name = string("op_706_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_706_pad_0 = const()[name = string("op_706_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_706_dilations_0 = const()[name = string("op_706_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_706_groups_0 = const()[name = string("op_706_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46410240))), nonzero_data = tensor<fp16, [19205]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46371712))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_706_cast_fp16 = conv(dilations = var_706_dilations_0, groups = var_706_groups_0, pad = var_706_pad_0, pad_type = var_706_pad_type_0, strides = var_706_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_706_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_5_cast_fp16 = add(x = var_700_cast_fp16, y = var_706_cast_fp16)[name = string("value_5_cast_fp16")];
+            tensor<int32, [4]> var_709 = const()[name = string("op_709"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_5_cast_fp16 = reshape(shape = var_709, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")];
+            fp16 var_711_to_fp16 = const()[name = string("op_711_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_712_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_711_to_fp16)[name = string("op_712_cast_fp16")];
+            tensor<int32, [4]> var_713 = const()[name = string("op_713"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_714_cast_fp16 = reshape(shape = var_713, x = key_5_cast_fp16)[name = string("op_714_cast_fp16")];
+            bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)];
+            bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_712_cast_fp16, y = var_714_cast_fp16)[name = string("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_717_cast_fp16 = softmax(axis = var_628, x = mh_w_5_cast_fp16)[name = string("op_717_cast_fp16")];
+            tensor<int32, [4]> var_718 = const()[name = string("op_718"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_719_cast_fp16 = reshape(shape = var_718, x = value_5_cast_fp16)[name = string("op_719_cast_fp16")];
+            bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)];
+            bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_719_cast_fp16, y = var_717_cast_fp16)[name = string("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_722 = const()[name = string("op_722"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_17_cast_fp16 = reshape(shape = var_722, x = attn_5_cast_fp16)[name = string("input_17_cast_fp16")];
+            string var_732_pad_type_0 = const()[name = string("op_732_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_732_strides_0 = const()[name = string("op_732_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_732_pad_0 = const()[name = string("op_732_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_732_dilations_0 = const()[name = string("op_732_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_732_groups_0 = const()[name = string("op_732_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46615104))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47434368))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47434496)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_732_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_732_dilations_0, groups = var_732_groups_0, pad = var_732_pad_0, pad_type = var_732_pad_type_0, strides = var_732_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = string("op_732_cast_fp16")];
+            string var_738_pad_type_0 = const()[name = string("op_738_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_738_strides_0 = const()[name = string("op_738_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_738_pad_0 = const()[name = string("op_738_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_738_dilations_0 = const()[name = string("op_738_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_738_groups_0 = const()[name = string("op_738_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47467328))), nonzero_data = tensor<fp16, [15072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47437120))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_738_cast_fp16 = conv(dilations = var_738_dilations_0, groups = var_738_groups_0, pad = var_738_pad_0, pad_type = var_738_pad_type_0, strides = var_738_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = string("op_738_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_11_cast_fp16 = add(x = var_732_cast_fp16, y = var_738_cast_fp16)[name = string("obj_11_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_749_to_fp16 = const()[name = string("op_749_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_749_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")];
+            tensor<fp16, [1280]> input_19_gamma_0_to_fp16 = const()[name = string("input_19_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47672192)))];
+            tensor<fp16, [1280]> input_19_beta_0_to_fp16 = const()[name = string("input_19_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47674816)))];
+            fp16 input_19_epsilon_0_to_fp16 = const()[name = string("input_19_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_19_cast_fp16")];
+            string var_767_pad_type_0 = const()[name = string("op_767_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_767_strides_0 = const()[name = string("op_767_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_767_pad_0 = const()[name = string("op_767_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_767_dilations_0 = const()[name = string("op_767_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_767_groups_0 = const()[name = string("op_767_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47677440))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50954304))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50954432)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_767_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_767_dilations_0, groups = var_767_groups_0, pad = var_767_pad_0, pad_type = var_767_pad_type_0, strides = var_767_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_767_cast_fp16")];
+            string var_773_pad_type_0 = const()[name = string("op_773_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_773_strides_0 = const()[name = string("op_773_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_773_pad_0 = const()[name = string("op_773_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_773_dilations_0 = const()[name = string("op_773_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_773_groups_0 = const()[name = string("op_773_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51003008))), nonzero_data = tensor<fp16, [19084]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50964736))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_773_cast_fp16 = conv(dilations = var_773_dilations_0, groups = var_773_groups_0, pad = var_773_pad_0, pad_type = var_773_pad_type_0, strides = var_773_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_773_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_21_cast_fp16 = add(x = var_767_cast_fp16, y = var_773_cast_fp16)[name = string("input_21_cast_fp16")];
+            string input_23_mode_0 = const()[name = string("input_23_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = string("input_23_cast_fp16")];
+            string var_784_pad_type_0 = const()[name = string("op_784_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_784_strides_0 = const()[name = string("op_784_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_784_pad_0 = const()[name = string("op_784_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_784_dilations_0 = const()[name = string("op_784_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_784_groups_0 = const()[name = string("op_784_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51822272))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55099136))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55099264)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_784_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_784_dilations_0, groups = var_784_groups_0, pad = var_784_pad_0, pad_type = var_784_pad_type_0, strides = var_784_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_784_cast_fp16")];
+            string var_790_pad_type_0 = const()[name = string("op_790_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_790_strides_0 = const()[name = string("op_790_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_790_pad_0 = const()[name = string("op_790_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_790_dilations_0 = const()[name = string("op_790_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_790_groups_0 = const()[name = string("op_790_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55315392))), nonzero_data = tensor<fp16, [106720]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55101888))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_790_cast_fp16 = conv(dilations = var_790_dilations_0, groups = var_790_groups_0, pad = var_790_pad_0, pad_type = var_790_pad_type_0, strides = var_790_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_790_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_9_cast_fp16 = add(x = var_784_cast_fp16, y = var_790_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_13_cast_fp16")];
+            int32 var_800 = const()[name = string("op_800"), val = int32(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_819_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")];
+            tensor<fp16, [1280]> obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56134656)))];
+            tensor<fp16, [1280]> obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56137280)))];
+            fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_13_cast_fp16")];
+            string var_841_pad_type_0 = const()[name = string("op_841_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_841_strides_0 = const()[name = string("op_841_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_841_pad_0 = const()[name = string("op_841_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_841_dilations_0 = const()[name = string("op_841_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_841_groups_0 = const()[name = string("op_841_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56139904))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56959168))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56959296)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_841_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_841_dilations_0, groups = var_841_groups_0, pad = var_841_pad_0, pad_type = var_841_pad_type_0, strides = var_841_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_841_cast_fp16")];
+            string var_847_pad_type_0 = const()[name = string("op_847_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_847_strides_0 = const()[name = string("op_847_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_847_pad_0 = const()[name = string("op_847_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_847_dilations_0 = const()[name = string("op_847_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_847_groups_0 = const()[name = string("op_847_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57013184))), nonzero_data = tensor<fp16, [25575]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56961920))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_847_cast_fp16 = conv(dilations = var_847_dilations_0, groups = var_847_groups_0, pad = var_847_pad_0, pad_type = var_847_pad_type_0, strides = var_847_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_847_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_7_cast_fp16 = add(x = var_841_cast_fp16, y = var_847_cast_fp16)[name = string("query_7_cast_fp16")];
+            string var_856_pad_type_0 = const()[name = string("op_856_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_856_strides_0 = const()[name = string("op_856_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_856_pad_0 = const()[name = string("op_856_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_856_dilations_0 = const()[name = string("op_856_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_856_groups_0 = const()[name = string("op_856_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57218048))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58037312))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_856_cast_fp16 = conv(dilations = var_856_dilations_0, groups = var_856_groups_0, pad = var_856_pad_0, pad_type = var_856_pad_type_0, strides = var_856_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_856_cast_fp16")];
+            string var_862_pad_type_0 = const()[name = string("op_862_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_862_strides_0 = const()[name = string("op_862_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_862_pad_0 = const()[name = string("op_862_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_862_dilations_0 = const()[name = string("op_862_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_862_groups_0 = const()[name = string("op_862_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58077888))), nonzero_data = tensor<fp16, [20191]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58037440))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_862_cast_fp16 = conv(dilations = var_862_dilations_0, groups = var_862_groups_0, pad = var_862_pad_0, pad_type = var_862_pad_type_0, strides = var_862_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_862_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_7_cast_fp16 = add(x = var_856_cast_fp16, y = var_862_cast_fp16)[name = string("key_7_cast_fp16")];
+            string var_872_pad_type_0 = const()[name = string("op_872_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_872_strides_0 = const()[name = string("op_872_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_872_pad_0 = const()[name = string("op_872_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_872_dilations_0 = const()[name = string("op_872_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_872_groups_0 = const()[name = string("op_872_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58282752))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59102016))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59102144)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_872_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_872_dilations_0, groups = var_872_groups_0, pad = var_872_pad_0, pad_type = var_872_pad_type_0, strides = var_872_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_872_cast_fp16")];
+            string var_878_pad_type_0 = const()[name = string("op_878_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_878_strides_0 = const()[name = string("op_878_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_878_pad_0 = const()[name = string("op_878_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_878_dilations_0 = const()[name = string("op_878_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_878_groups_0 = const()[name = string("op_878_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59138432))), nonzero_data = tensor<fp16, [16773]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59104768))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_878_cast_fp16 = conv(dilations = var_878_dilations_0, groups = var_878_groups_0, pad = var_878_pad_0, pad_type = var_878_pad_type_0, strides = var_878_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_878_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_7_cast_fp16 = add(x = var_872_cast_fp16, y = var_878_cast_fp16)[name = string("value_7_cast_fp16")];
+            tensor<int32, [4]> var_881 = const()[name = string("op_881"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_7_cast_fp16 = reshape(shape = var_881, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")];
+            fp16 var_883_to_fp16 = const()[name = string("op_883_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_884_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_883_to_fp16)[name = string("op_884_cast_fp16")];
+            tensor<int32, [4]> var_885 = const()[name = string("op_885"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_886_cast_fp16 = reshape(shape = var_885, x = key_7_cast_fp16)[name = string("op_886_cast_fp16")];
+            bool mh_w_7_transpose_x_0 = const()[name = string("mh_w_7_transpose_x_0"), val = bool(true)];
+            bool mh_w_7_transpose_y_0 = const()[name = string("mh_w_7_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_884_cast_fp16, y = var_886_cast_fp16)[name = string("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_889_cast_fp16 = softmax(axis = var_800, x = mh_w_7_cast_fp16)[name = string("op_889_cast_fp16")];
+            tensor<int32, [4]> var_890 = const()[name = string("op_890"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_891_cast_fp16 = reshape(shape = var_890, x = value_7_cast_fp16)[name = string("op_891_cast_fp16")];
+            bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)];
+            bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_891_cast_fp16, y = var_889_cast_fp16)[name = string("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_894 = const()[name = string("op_894"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_25_cast_fp16 = reshape(shape = var_894, x = attn_7_cast_fp16)[name = string("input_25_cast_fp16")];
+            string var_904_pad_type_0 = const()[name = string("op_904_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_904_strides_0 = const()[name = string("op_904_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_904_pad_0 = const()[name = string("op_904_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_904_dilations_0 = const()[name = string("op_904_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_904_groups_0 = const()[name = string("op_904_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59343296))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60162560))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60162688)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_904_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_904_dilations_0, groups = var_904_groups_0, pad = var_904_pad_0, pad_type = var_904_pad_type_0, strides = var_904_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_904_cast_fp16")];
+            string var_910_pad_type_0 = const()[name = string("op_910_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_910_strides_0 = const()[name = string("op_910_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_910_pad_0 = const()[name = string("op_910_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_910_dilations_0 = const()[name = string("op_910_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_910_groups_0 = const()[name = string("op_910_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60188096))), nonzero_data = tensor<fp16, [11350]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60165312))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_910_cast_fp16 = conv(dilations = var_910_dilations_0, groups = var_910_groups_0, pad = var_910_pad_0, pad_type = var_910_pad_type_0, strides = var_910_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_910_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_15_cast_fp16 = add(x = var_904_cast_fp16, y = var_910_cast_fp16)[name = string("obj_15_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = string("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_921_to_fp16 = const()[name = string("op_921_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_921_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")];
+            tensor<fp16, [1280]> input_27_gamma_0_to_fp16 = const()[name = string("input_27_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60392960)))];
+            tensor<fp16, [1280]> input_27_beta_0_to_fp16 = const()[name = string("input_27_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60395584)))];
+            fp16 input_27_epsilon_0_to_fp16 = const()[name = string("input_27_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("input_27_cast_fp16")];
+            string var_939_pad_type_0 = const()[name = string("op_939_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_939_strides_0 = const()[name = string("op_939_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_939_pad_0 = const()[name = string("op_939_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_939_dilations_0 = const()[name = string("op_939_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_939_groups_0 = const()[name = string("op_939_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60398208))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63675072))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63675200)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_939_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_939_dilations_0, groups = var_939_groups_0, pad = var_939_pad_0, pad_type = var_939_pad_type_0, strides = var_939_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("op_939_cast_fp16")];
+            string var_945_pad_type_0 = const()[name = string("op_945_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_945_strides_0 = const()[name = string("op_945_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_945_pad_0 = const()[name = string("op_945_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_945_dilations_0 = const()[name = string("op_945_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_945_groups_0 = const()[name = string("op_945_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63740864))), nonzero_data = tensor<fp16, [27622]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63685504))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_945_cast_fp16 = conv(dilations = var_945_dilations_0, groups = var_945_groups_0, pad = var_945_pad_0, pad_type = var_945_pad_type_0, strides = var_945_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = string("op_945_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_29_cast_fp16 = add(x = var_939_cast_fp16, y = var_945_cast_fp16)[name = string("input_29_cast_fp16")];
+            string input_31_mode_0 = const()[name = string("input_31_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = string("input_31_cast_fp16")];
+            string var_956_pad_type_0 = const()[name = string("op_956_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_956_strides_0 = const()[name = string("op_956_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_956_pad_0 = const()[name = string("op_956_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_956_dilations_0 = const()[name = string("op_956_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_956_groups_0 = const()[name = string("op_956_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64560128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67836992))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67837120)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_956_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_956_dilations_0, groups = var_956_groups_0, pad = var_956_pad_0, pad_type = var_956_pad_type_0, strides = var_956_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_956_cast_fp16")];
+            string var_962_pad_type_0 = const()[name = string("op_962_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_962_strides_0 = const()[name = string("op_962_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_962_pad_0 = const()[name = string("op_962_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_962_dilations_0 = const()[name = string("op_962_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_962_groups_0 = const()[name = string("op_962_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68064960))), nonzero_data = tensor<fp16, [112545]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67839744))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_962_cast_fp16 = conv(dilations = var_962_dilations_0, groups = var_962_groups_0, pad = var_962_pad_0, pad_type = var_962_pad_type_0, strides = var_962_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_962_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_11_cast_fp16 = add(x = var_956_cast_fp16, y = var_962_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_17_cast_fp16")];
+            int32 var_972 = const()[name = string("op_972"), val = int32(3)];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_991_to_fp16 = const()[name = string("op_991_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_991_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")];
+            tensor<fp16, [1280]> obj_17_gamma_0_to_fp16 = const()[name = string("obj_17_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68884224)))];
+            tensor<fp16, [1280]> obj_17_beta_0_to_fp16 = const()[name = string("obj_17_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68886848)))];
+            fp16 obj_17_epsilon_0_to_fp16 = const()[name = string("obj_17_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("obj_17_cast_fp16")];
+            string var_1013_pad_type_0 = const()[name = string("op_1013_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1013_strides_0 = const()[name = string("op_1013_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1013_pad_0 = const()[name = string("op_1013_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1013_dilations_0 = const()[name = string("op_1013_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1013_groups_0 = const()[name = string("op_1013_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68889472))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69708736))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69708864)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1013_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1013_dilations_0, groups = var_1013_groups_0, pad = var_1013_pad_0, pad_type = var_1013_pad_type_0, strides = var_1013_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1013_cast_fp16")];
+            string var_1019_pad_type_0 = const()[name = string("op_1019_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1019_strides_0 = const()[name = string("op_1019_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1019_pad_0 = const()[name = string("op_1019_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1019_dilations_0 = const()[name = string("op_1019_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1019_groups_0 = const()[name = string("op_1019_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69759808))), nonzero_data = tensor<fp16, [24120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69711488))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1019_cast_fp16 = conv(dilations = var_1019_dilations_0, groups = var_1019_groups_0, pad = var_1019_pad_0, pad_type = var_1019_pad_type_0, strides = var_1019_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1019_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_9_cast_fp16 = add(x = var_1013_cast_fp16, y = var_1019_cast_fp16)[name = string("query_9_cast_fp16")];
+            string var_1028_pad_type_0 = const()[name = string("op_1028_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1028_strides_0 = const()[name = string("op_1028_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1028_pad_0 = const()[name = string("op_1028_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1028_dilations_0 = const()[name = string("op_1028_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1028_groups_0 = const()[name = string("op_1028_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69964672))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70783936))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1028_cast_fp16 = conv(dilations = var_1028_dilations_0, groups = var_1028_groups_0, pad = var_1028_pad_0, pad_type = var_1028_pad_type_0, strides = var_1028_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1028_cast_fp16")];
+            string var_1034_pad_type_0 = const()[name = string("op_1034_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1034_strides_0 = const()[name = string("op_1034_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1034_pad_0 = const()[name = string("op_1034_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1034_dilations_0 = const()[name = string("op_1034_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1034_groups_0 = const()[name = string("op_1034_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70823680))), nonzero_data = tensor<fp16, [19761]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70784064))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1034_cast_fp16 = conv(dilations = var_1034_dilations_0, groups = var_1034_groups_0, pad = var_1034_pad_0, pad_type = var_1034_pad_type_0, strides = var_1034_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1034_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_9_cast_fp16 = add(x = var_1028_cast_fp16, y = var_1034_cast_fp16)[name = string("key_9_cast_fp16")];
+            string var_1044_pad_type_0 = const()[name = string("op_1044_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1044_strides_0 = const()[name = string("op_1044_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1044_pad_0 = const()[name = string("op_1044_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1044_dilations_0 = const()[name = string("op_1044_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1044_groups_0 = const()[name = string("op_1044_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71028544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71847808))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71847936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1044_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1044_dilations_0, groups = var_1044_groups_0, pad = var_1044_pad_0, pad_type = var_1044_pad_type_0, strides = var_1044_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1044_cast_fp16")];
+            string var_1050_pad_type_0 = const()[name = string("op_1050_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1050_strides_0 = const()[name = string("op_1050_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1050_pad_0 = const()[name = string("op_1050_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1050_dilations_0 = const()[name = string("op_1050_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1050_groups_0 = const()[name = string("op_1050_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71883776))), nonzero_data = tensor<fp16, [16547]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71850560))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1050_cast_fp16 = conv(dilations = var_1050_dilations_0, groups = var_1050_groups_0, pad = var_1050_pad_0, pad_type = var_1050_pad_type_0, strides = var_1050_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1050_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_9_cast_fp16 = add(x = var_1044_cast_fp16, y = var_1050_cast_fp16)[name = string("value_9_cast_fp16")];
+            tensor<int32, [4]> var_1053 = const()[name = string("op_1053"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_9_cast_fp16 = reshape(shape = var_1053, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")];
+            fp16 var_1055_to_fp16 = const()[name = string("op_1055_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_1056_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_1055_to_fp16)[name = string("op_1056_cast_fp16")];
+            tensor<int32, [4]> var_1057 = const()[name = string("op_1057"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1058_cast_fp16 = reshape(shape = var_1057, x = key_9_cast_fp16)[name = string("op_1058_cast_fp16")];
+            bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)];
+            bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1056_cast_fp16, y = var_1058_cast_fp16)[name = string("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1061_cast_fp16 = softmax(axis = var_972, x = mh_w_9_cast_fp16)[name = string("op_1061_cast_fp16")];
+            tensor<int32, [4]> var_1062 = const()[name = string("op_1062"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1063_cast_fp16 = reshape(shape = var_1062, x = value_9_cast_fp16)[name = string("op_1063_cast_fp16")];
+            bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)];
+            bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_1063_cast_fp16, y = var_1061_cast_fp16)[name = string("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_1066 = const()[name = string("op_1066"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_33_cast_fp16 = reshape(shape = var_1066, x = attn_9_cast_fp16)[name = string("input_33_cast_fp16")];
+            string var_1076_pad_type_0 = const()[name = string("op_1076_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1076_strides_0 = const()[name = string("op_1076_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1076_pad_0 = const()[name = string("op_1076_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1076_dilations_0 = const()[name = string("op_1076_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1076_groups_0 = const()[name = string("op_1076_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72088640))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72907904))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72908032)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1076_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1076_dilations_0, groups = var_1076_groups_0, pad = var_1076_pad_0, pad_type = var_1076_pad_type_0, strides = var_1076_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1076_cast_fp16")];
+            string var_1082_pad_type_0 = const()[name = string("op_1082_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1082_strides_0 = const()[name = string("op_1082_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1082_pad_0 = const()[name = string("op_1082_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1082_dilations_0 = const()[name = string("op_1082_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1082_groups_0 = const()[name = string("op_1082_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72933952))), nonzero_data = tensor<fp16, [11593]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72910656))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1082_cast_fp16 = conv(dilations = var_1082_dilations_0, groups = var_1082_groups_0, pad = var_1082_pad_0, pad_type = var_1082_pad_type_0, strides = var_1082_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1082_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_19_cast_fp16 = add(x = var_1076_cast_fp16, y = var_1082_cast_fp16)[name = string("obj_19_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = string("inputs_19_cast_fp16")];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1093_to_fp16 = const()[name = string("op_1093_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1093_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")];
+            tensor<fp16, [1280]> input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73138816)))];
+            tensor<fp16, [1280]> input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73141440)))];
+            fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("input_35_cast_fp16")];
+            string var_1111_pad_type_0 = const()[name = string("op_1111_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1111_strides_0 = const()[name = string("op_1111_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1111_pad_0 = const()[name = string("op_1111_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1111_dilations_0 = const()[name = string("op_1111_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1111_groups_0 = const()[name = string("op_1111_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73144064))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76420928))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76421056)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1111_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1111_dilations_0, groups = var_1111_groups_0, pad = var_1111_pad_0, pad_type = var_1111_pad_type_0, strides = var_1111_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1111_cast_fp16")];
+            string var_1117_pad_type_0 = const()[name = string("op_1117_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1117_strides_0 = const()[name = string("op_1117_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1117_pad_0 = const()[name = string("op_1117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1117_dilations_0 = const()[name = string("op_1117_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1117_groups_0 = const()[name = string("op_1117_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76440192))), nonzero_data = tensor<fp16, [4361]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76431360))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1117_cast_fp16 = conv(dilations = var_1117_dilations_0, groups = var_1117_groups_0, pad = var_1117_pad_0, pad_type = var_1117_pad_type_0, strides = var_1117_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1117_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_37_cast_fp16 = add(x = var_1111_cast_fp16, y = var_1117_cast_fp16)[name = string("input_37_cast_fp16")];
+            string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")];
+            string var_1128_pad_type_0 = const()[name = string("op_1128_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1128_strides_0 = const()[name = string("op_1128_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1128_pad_0 = const()[name = string("op_1128_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1128_dilations_0 = const()[name = string("op_1128_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1128_groups_0 = const()[name = string("op_1128_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77259456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80536320))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80536448)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1128_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1128_dilations_0, groups = var_1128_groups_0, pad = var_1128_pad_0, pad_type = var_1128_pad_type_0, strides = var_1128_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1128_cast_fp16")];
+            string var_1134_pad_type_0 = const()[name = string("op_1134_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1134_strides_0 = const()[name = string("op_1134_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1134_pad_0 = const()[name = string("op_1134_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1134_dilations_0 = const()[name = string("op_1134_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1134_groups_0 = const()[name = string("op_1134_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80778304))), nonzero_data = tensor<fp16, [119569]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80539072))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1134_cast_fp16 = conv(dilations = var_1134_dilations_0, groups = var_1134_groups_0, pad = var_1134_pad_0, pad_type = var_1134_pad_type_0, strides = var_1134_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1134_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_13_cast_fp16 = add(x = var_1128_cast_fp16, y = var_1134_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_21_cast_fp16")];
+            int32 var_1144 = const()[name = string("op_1144"), val = int32(3)];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1163_to_fp16 = const()[name = string("op_1163_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1163_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")];
+            tensor<fp16, [1280]> obj_21_gamma_0_to_fp16 = const()[name = string("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81597568)))];
+            tensor<fp16, [1280]> obj_21_beta_0_to_fp16 = const()[name = string("obj_21_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81600192)))];
+            fp16 obj_21_epsilon_0_to_fp16 = const()[name = string("obj_21_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_21_cast_fp16")];
+            string var_1185_pad_type_0 = const()[name = string("op_1185_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1185_strides_0 = const()[name = string("op_1185_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1185_pad_0 = const()[name = string("op_1185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1185_dilations_0 = const()[name = string("op_1185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1185_groups_0 = const()[name = string("op_1185_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81602816))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82422080))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82422208)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1185_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1185_dilations_0, groups = var_1185_groups_0, pad = var_1185_pad_0, pad_type = var_1185_pad_type_0, strides = var_1185_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1185_cast_fp16")];
+            string var_1191_pad_type_0 = const()[name = string("op_1191_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1191_strides_0 = const()[name = string("op_1191_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1191_pad_0 = const()[name = string("op_1191_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1191_dilations_0 = const()[name = string("op_1191_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1191_groups_0 = const()[name = string("op_1191_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82473856))), nonzero_data = tensor<fp16, [24480]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82424832))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1191_cast_fp16 = conv(dilations = var_1191_dilations_0, groups = var_1191_groups_0, pad = var_1191_pad_0, pad_type = var_1191_pad_type_0, strides = var_1191_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1191_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_11_cast_fp16 = add(x = var_1185_cast_fp16, y = var_1191_cast_fp16)[name = string("query_11_cast_fp16")];
+            string var_1200_pad_type_0 = const()[name = string("op_1200_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1200_strides_0 = const()[name = string("op_1200_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1200_pad_0 = const()[name = string("op_1200_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1200_dilations_0 = const()[name = string("op_1200_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1200_groups_0 = const()[name = string("op_1200_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82678720))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83497984))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1200_cast_fp16 = conv(dilations = var_1200_dilations_0, groups = var_1200_groups_0, pad = var_1200_pad_0, pad_type = var_1200_pad_type_0, strides = var_1200_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1200_cast_fp16")];
+            string var_1206_pad_type_0 = const()[name = string("op_1206_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1206_strides_0 = const()[name = string("op_1206_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1206_pad_0 = const()[name = string("op_1206_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1206_dilations_0 = const()[name = string("op_1206_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1206_groups_0 = const()[name = string("op_1206_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83533248))), nonzero_data = tensor<fp16, [17508]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83498112))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1206_cast_fp16 = conv(dilations = var_1206_dilations_0, groups = var_1206_groups_0, pad = var_1206_pad_0, pad_type = var_1206_pad_type_0, strides = var_1206_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1206_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_11_cast_fp16 = add(x = var_1200_cast_fp16, y = var_1206_cast_fp16)[name = string("key_11_cast_fp16")];
+            string var_1216_pad_type_0 = const()[name = string("op_1216_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1216_strides_0 = const()[name = string("op_1216_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1216_pad_0 = const()[name = string("op_1216_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1216_dilations_0 = const()[name = string("op_1216_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1216_groups_0 = const()[name = string("op_1216_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83738112))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84557376))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84557504)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1216_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1216_dilations_0, groups = var_1216_groups_0, pad = var_1216_pad_0, pad_type = var_1216_pad_type_0, strides = var_1216_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1216_cast_fp16")];
+            string var_1222_pad_type_0 = const()[name = string("op_1222_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1222_strides_0 = const()[name = string("op_1222_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1222_pad_0 = const()[name = string("op_1222_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1222_dilations_0 = const()[name = string("op_1222_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1222_groups_0 = const()[name = string("op_1222_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84586624))), nonzero_data = tensor<fp16, [13195]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84560128))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1222_cast_fp16 = conv(dilations = var_1222_dilations_0, groups = var_1222_groups_0, pad = var_1222_pad_0, pad_type = var_1222_pad_type_0, strides = var_1222_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1222_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_11_cast_fp16 = add(x = var_1216_cast_fp16, y = var_1222_cast_fp16)[name = string("value_11_cast_fp16")];
+            tensor<int32, [4]> var_1225 = const()[name = string("op_1225"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_11_cast_fp16 = reshape(shape = var_1225, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")];
+            fp16 var_1227_to_fp16 = const()[name = string("op_1227_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_1228_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_1227_to_fp16)[name = string("op_1228_cast_fp16")];
+            tensor<int32, [4]> var_1229 = const()[name = string("op_1229"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1230_cast_fp16 = reshape(shape = var_1229, x = key_11_cast_fp16)[name = string("op_1230_cast_fp16")];
+            bool mh_w_11_transpose_x_0 = const()[name = string("mh_w_11_transpose_x_0"), val = bool(true)];
+            bool mh_w_11_transpose_y_0 = const()[name = string("mh_w_11_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_1228_cast_fp16, y = var_1230_cast_fp16)[name = string("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1233_cast_fp16 = softmax(axis = var_1144, x = mh_w_11_cast_fp16)[name = string("op_1233_cast_fp16")];
+            tensor<int32, [4]> var_1234 = const()[name = string("op_1234"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1235_cast_fp16 = reshape(shape = var_1234, x = value_11_cast_fp16)[name = string("op_1235_cast_fp16")];
+            bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)];
+            bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_1235_cast_fp16, y = var_1233_cast_fp16)[name = string("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_1238 = const()[name = string("op_1238"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_41_cast_fp16 = reshape(shape = var_1238, x = attn_11_cast_fp16)[name = string("input_41_cast_fp16")];
+            string var_1248_pad_type_0 = const()[name = string("op_1248_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1248_strides_0 = const()[name = string("op_1248_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1248_pad_0 = const()[name = string("op_1248_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1248_dilations_0 = const()[name = string("op_1248_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1248_groups_0 = const()[name = string("op_1248_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84791488))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85610752))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85610880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1248_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1248_dilations_0, groups = var_1248_groups_0, pad = var_1248_pad_0, pad_type = var_1248_pad_type_0, strides = var_1248_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1248_cast_fp16")];
+            string var_1254_pad_type_0 = const()[name = string("op_1254_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1254_strides_0 = const()[name = string("op_1254_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1254_pad_0 = const()[name = string("op_1254_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1254_dilations_0 = const()[name = string("op_1254_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1254_groups_0 = const()[name = string("op_1254_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85634944))), nonzero_data = tensor<fp16, [10679]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85613504))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1254_cast_fp16 = conv(dilations = var_1254_dilations_0, groups = var_1254_groups_0, pad = var_1254_pad_0, pad_type = var_1254_pad_type_0, strides = var_1254_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1254_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_23_cast_fp16 = add(x = var_1248_cast_fp16, y = var_1254_cast_fp16)[name = string("obj_23_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = string("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1265_to_fp16 = const()[name = string("op_1265_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1265_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")];
+            tensor<fp16, [1280]> input_43_gamma_0_to_fp16 = const()[name = string("input_43_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85839808)))];
+            tensor<fp16, [1280]> input_43_beta_0_to_fp16 = const()[name = string("input_43_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85842432)))];
+            fp16 input_43_epsilon_0_to_fp16 = const()[name = string("input_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_43_cast_fp16")];
+            string var_1283_pad_type_0 = const()[name = string("op_1283_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1283_strides_0 = const()[name = string("op_1283_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1283_pad_0 = const()[name = string("op_1283_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1283_dilations_0 = const()[name = string("op_1283_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1283_groups_0 = const()[name = string("op_1283_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85845056))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89121920))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89122048)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1283_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1283_dilations_0, groups = var_1283_groups_0, pad = var_1283_pad_0, pad_type = var_1283_pad_type_0, strides = var_1283_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1283_cast_fp16")];
+            string var_1289_pad_type_0 = const()[name = string("op_1289_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1289_strides_0 = const()[name = string("op_1289_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1289_pad_0 = const()[name = string("op_1289_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1289_dilations_0 = const()[name = string("op_1289_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1289_groups_0 = const()[name = string("op_1289_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89166464))), nonzero_data = tensor<fp16, [17005]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89132352))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1289_cast_fp16 = conv(dilations = var_1289_dilations_0, groups = var_1289_groups_0, pad = var_1289_pad_0, pad_type = var_1289_pad_type_0, strides = var_1289_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1289_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_45_cast_fp16 = add(x = var_1283_cast_fp16, y = var_1289_cast_fp16)[name = string("input_45_cast_fp16")];
+            string input_47_mode_0 = const()[name = string("input_47_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")];
+            string var_1300_pad_type_0 = const()[name = string("op_1300_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1300_strides_0 = const()[name = string("op_1300_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1300_pad_0 = const()[name = string("op_1300_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1300_dilations_0 = const()[name = string("op_1300_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1300_groups_0 = const()[name = string("op_1300_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89985728))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93262592))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93262720)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1300_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1300_dilations_0, groups = var_1300_groups_0, pad = var_1300_pad_0, pad_type = var_1300_pad_type_0, strides = var_1300_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = string("op_1300_cast_fp16")];
+            string var_1306_pad_type_0 = const()[name = string("op_1306_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1306_strides_0 = const()[name = string("op_1306_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1306_pad_0 = const()[name = string("op_1306_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1306_dilations_0 = const()[name = string("op_1306_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1306_groups_0 = const()[name = string("op_1306_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93508032))), nonzero_data = tensor<fp16, [121288]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93265344))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1306_cast_fp16 = conv(dilations = var_1306_dilations_0, groups = var_1306_groups_0, pad = var_1306_pad_0, pad_type = var_1306_pad_type_0, strides = var_1306_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = string("op_1306_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_15_cast_fp16 = add(x = var_1300_cast_fp16, y = var_1306_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_25_cast_fp16")];
+            int32 var_1316 = const()[name = string("op_1316"), val = int32(3)];
+            tensor<int32, [1]> out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1335_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")];
+            tensor<fp16, [1280]> obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94327296)))];
+            tensor<fp16, [1280]> obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94329920)))];
+            fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_25_cast_fp16")];
+            string var_1357_pad_type_0 = const()[name = string("op_1357_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1357_strides_0 = const()[name = string("op_1357_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1357_pad_0 = const()[name = string("op_1357_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1357_dilations_0 = const()[name = string("op_1357_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1357_groups_0 = const()[name = string("op_1357_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94332544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95151808))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95151936)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1357_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1357_dilations_0, groups = var_1357_groups_0, pad = var_1357_pad_0, pad_type = var_1357_pad_type_0, strides = var_1357_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1357_cast_fp16")];
+            string var_1363_pad_type_0 = const()[name = string("op_1363_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1363_strides_0 = const()[name = string("op_1363_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1363_pad_0 = const()[name = string("op_1363_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1363_dilations_0 = const()[name = string("op_1363_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1363_groups_0 = const()[name = string("op_1363_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95204224))), nonzero_data = tensor<fp16, [24786]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95154560))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1363_cast_fp16 = conv(dilations = var_1363_dilations_0, groups = var_1363_groups_0, pad = var_1363_pad_0, pad_type = var_1363_pad_type_0, strides = var_1363_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1363_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_13_cast_fp16 = add(x = var_1357_cast_fp16, y = var_1363_cast_fp16)[name = string("query_13_cast_fp16")];
+            string var_1372_pad_type_0 = const()[name = string("op_1372_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1372_strides_0 = const()[name = string("op_1372_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1372_pad_0 = const()[name = string("op_1372_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1372_dilations_0 = const()[name = string("op_1372_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1372_groups_0 = const()[name = string("op_1372_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95409088))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96228352))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1372_cast_fp16 = conv(dilations = var_1372_dilations_0, groups = var_1372_groups_0, pad = var_1372_pad_0, pad_type = var_1372_pad_type_0, strides = var_1372_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1372_cast_fp16")];
+            string var_1378_pad_type_0 = const()[name = string("op_1378_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1378_strides_0 = const()[name = string("op_1378_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1378_pad_0 = const()[name = string("op_1378_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1378_dilations_0 = const()[name = string("op_1378_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1378_groups_0 = const()[name = string("op_1378_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96263168))), nonzero_data = tensor<fp16, [17281]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96228480))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1378_cast_fp16 = conv(dilations = var_1378_dilations_0, groups = var_1378_groups_0, pad = var_1378_pad_0, pad_type = var_1378_pad_type_0, strides = var_1378_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1378_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_13_cast_fp16 = add(x = var_1372_cast_fp16, y = var_1378_cast_fp16)[name = string("key_13_cast_fp16")];
+            string var_1388_pad_type_0 = const()[name = string("op_1388_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1388_strides_0 = const()[name = string("op_1388_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1388_pad_0 = const()[name = string("op_1388_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1388_dilations_0 = const()[name = string("op_1388_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1388_groups_0 = const()[name = string("op_1388_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96468032))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97287296))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97287424)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1388_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1388_dilations_0, groups = var_1388_groups_0, pad = var_1388_pad_0, pad_type = var_1388_pad_type_0, strides = var_1388_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1388_cast_fp16")];
+            string var_1394_pad_type_0 = const()[name = string("op_1394_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1394_strides_0 = const()[name = string("op_1394_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1394_pad_0 = const()[name = string("op_1394_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1394_dilations_0 = const()[name = string("op_1394_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1394_groups_0 = const()[name = string("op_1394_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97313920))), nonzero_data = tensor<fp16, [11874]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97290048))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1394_cast_fp16 = conv(dilations = var_1394_dilations_0, groups = var_1394_groups_0, pad = var_1394_pad_0, pad_type = var_1394_pad_type_0, strides = var_1394_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1394_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_13_cast_fp16 = add(x = var_1388_cast_fp16, y = var_1394_cast_fp16)[name = string("value_13_cast_fp16")];
+            tensor<int32, [4]> var_1397 = const()[name = string("op_1397"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_13_cast_fp16 = reshape(shape = var_1397, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")];
+            fp16 var_1399_to_fp16 = const()[name = string("op_1399_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_1400_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1399_to_fp16)[name = string("op_1400_cast_fp16")];
+            tensor<int32, [4]> var_1401 = const()[name = string("op_1401"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1402_cast_fp16 = reshape(shape = var_1401, x = key_13_cast_fp16)[name = string("op_1402_cast_fp16")];
+            bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)];
+            bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1400_cast_fp16, y = var_1402_cast_fp16)[name = string("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1405_cast_fp16 = softmax(axis = var_1316, x = mh_w_13_cast_fp16)[name = string("op_1405_cast_fp16")];
+            tensor<int32, [4]> var_1406 = const()[name = string("op_1406"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1407_cast_fp16 = reshape(shape = var_1406, x = value_13_cast_fp16)[name = string("op_1407_cast_fp16")];
+            bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)];
+            bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1407_cast_fp16, y = var_1405_cast_fp16)[name = string("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_1410 = const()[name = string("op_1410"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_49_cast_fp16 = reshape(shape = var_1410, x = attn_13_cast_fp16)[name = string("input_49_cast_fp16")];
+            string var_1420_pad_type_0 = const()[name = string("op_1420_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1420_strides_0 = const()[name = string("op_1420_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1420_pad_0 = const()[name = string("op_1420_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1420_dilations_0 = const()[name = string("op_1420_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1420_groups_0 = const()[name = string("op_1420_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97518784))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98338048))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98338176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1420_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1420_dilations_0, groups = var_1420_groups_0, pad = var_1420_pad_0, pad_type = var_1420_pad_type_0, strides = var_1420_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1420_cast_fp16")];
+            string var_1426_pad_type_0 = const()[name = string("op_1426_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1426_strides_0 = const()[name = string("op_1426_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1426_pad_0 = const()[name = string("op_1426_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1426_dilations_0 = const()[name = string("op_1426_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1426_groups_0 = const()[name = string("op_1426_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98362688))), nonzero_data = tensor<fp16, [10911]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98340800))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1426_cast_fp16 = conv(dilations = var_1426_dilations_0, groups = var_1426_groups_0, pad = var_1426_pad_0, pad_type = var_1426_pad_type_0, strides = var_1426_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1426_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_27_cast_fp16 = add(x = var_1420_cast_fp16, y = var_1426_cast_fp16)[name = string("obj_27_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = string("inputs_27_cast_fp16")];
+            tensor<int32, [1]> out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1437_to_fp16 = const()[name = string("op_1437_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1437_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")];
+            tensor<fp16, [1280]> input_51_gamma_0_to_fp16 = const()[name = string("input_51_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98567552)))];
+            tensor<fp16, [1280]> input_51_beta_0_to_fp16 = const()[name = string("input_51_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98570176)))];
+            fp16 input_51_epsilon_0_to_fp16 = const()[name = string("input_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("input_51_cast_fp16")];
+            string var_1455_pad_type_0 = const()[name = string("op_1455_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1455_strides_0 = const()[name = string("op_1455_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1455_pad_0 = const()[name = string("op_1455_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1455_dilations_0 = const()[name = string("op_1455_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1455_groups_0 = const()[name = string("op_1455_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98572800))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101849664))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101849792)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1455_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_1455_dilations_0, groups = var_1455_groups_0, pad = var_1455_pad_0, pad_type = var_1455_pad_type_0, strides = var_1455_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1455_cast_fp16")];
+            string var_1461_pad_type_0 = const()[name = string("op_1461_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1461_strides_0 = const()[name = string("op_1461_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1461_pad_0 = const()[name = string("op_1461_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1461_dilations_0 = const()[name = string("op_1461_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1461_groups_0 = const()[name = string("op_1461_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101881792))), nonzero_data = tensor<fp16, [10794]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101860096))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1461_cast_fp16 = conv(dilations = var_1461_dilations_0, groups = var_1461_groups_0, pad = var_1461_pad_0, pad_type = var_1461_pad_type_0, strides = var_1461_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1461_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_53_cast_fp16 = add(x = var_1455_cast_fp16, y = var_1461_cast_fp16)[name = string("input_53_cast_fp16")];
+            string input_55_mode_0 = const()[name = string("input_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = string("input_55_cast_fp16")];
+            string var_1472_pad_type_0 = const()[name = string("op_1472_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1472_strides_0 = const()[name = string("op_1472_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1472_pad_0 = const()[name = string("op_1472_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1472_dilations_0 = const()[name = string("op_1472_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1472_groups_0 = const()[name = string("op_1472_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102701056))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105977920))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105978048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1472_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_1472_dilations_0, groups = var_1472_groups_0, pad = var_1472_pad_0, pad_type = var_1472_pad_type_0, strides = var_1472_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1472_cast_fp16")];
+            string var_1478_pad_type_0 = const()[name = string("op_1478_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1478_strides_0 = const()[name = string("op_1478_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1478_pad_0 = const()[name = string("op_1478_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1478_dilations_0 = const()[name = string("op_1478_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1478_groups_0 = const()[name = string("op_1478_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106227584))), nonzero_data = tensor<fp16, [123417]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105980672))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1478_cast_fp16 = conv(dilations = var_1478_dilations_0, groups = var_1478_groups_0, pad = var_1478_pad_0, pad_type = var_1478_pad_type_0, strides = var_1478_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1478_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_17_cast_fp16 = add(x = var_1472_cast_fp16, y = var_1478_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_29_cast_fp16")];
+            int32 var_1488 = const()[name = string("op_1488"), val = int32(3)];
+            tensor<int32, [1]> out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1507_to_fp16 = const()[name = string("op_1507_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1507_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")];
+            tensor<fp16, [1280]> obj_29_gamma_0_to_fp16 = const()[name = string("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107046848)))];
+            tensor<fp16, [1280]> obj_29_beta_0_to_fp16 = const()[name = string("obj_29_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107049472)))];
+            fp16 obj_29_epsilon_0_to_fp16 = const()[name = string("obj_29_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("obj_29_cast_fp16")];
+            string var_1529_pad_type_0 = const()[name = string("op_1529_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1529_strides_0 = const()[name = string("op_1529_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1529_pad_0 = const()[name = string("op_1529_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1529_dilations_0 = const()[name = string("op_1529_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1529_groups_0 = const()[name = string("op_1529_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107052096))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107871360))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107871488)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1529_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1529_dilations_0, groups = var_1529_groups_0, pad = var_1529_pad_0, pad_type = var_1529_pad_type_0, strides = var_1529_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1529_cast_fp16")];
+            string var_1535_pad_type_0 = const()[name = string("op_1535_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1535_strides_0 = const()[name = string("op_1535_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1535_pad_0 = const()[name = string("op_1535_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1535_dilations_0 = const()[name = string("op_1535_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1535_groups_0 = const()[name = string("op_1535_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107923264))), nonzero_data = tensor<fp16, [24530]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107874112))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1535_cast_fp16 = conv(dilations = var_1535_dilations_0, groups = var_1535_groups_0, pad = var_1535_pad_0, pad_type = var_1535_pad_type_0, strides = var_1535_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1535_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_15_cast_fp16 = add(x = var_1529_cast_fp16, y = var_1535_cast_fp16)[name = string("query_15_cast_fp16")];
+            string var_1544_pad_type_0 = const()[name = string("op_1544_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1544_strides_0 = const()[name = string("op_1544_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1544_pad_0 = const()[name = string("op_1544_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1544_dilations_0 = const()[name = string("op_1544_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1544_groups_0 = const()[name = string("op_1544_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108128128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108947392))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1544_cast_fp16 = conv(dilations = var_1544_dilations_0, groups = var_1544_groups_0, pad = var_1544_pad_0, pad_type = var_1544_pad_type_0, strides = var_1544_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1544_cast_fp16")];
+            string var_1550_pad_type_0 = const()[name = string("op_1550_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1550_strides_0 = const()[name = string("op_1550_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1550_pad_0 = const()[name = string("op_1550_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1550_dilations_0 = const()[name = string("op_1550_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1550_groups_0 = const()[name = string("op_1550_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108983872))), nonzero_data = tensor<fp16, [18142]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108947520))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1550_cast_fp16 = conv(dilations = var_1550_dilations_0, groups = var_1550_groups_0, pad = var_1550_pad_0, pad_type = var_1550_pad_type_0, strides = var_1550_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1550_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_15_cast_fp16 = add(x = var_1544_cast_fp16, y = var_1550_cast_fp16)[name = string("key_15_cast_fp16")];
+            string var_1560_pad_type_0 = const()[name = string("op_1560_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1560_strides_0 = const()[name = string("op_1560_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1560_pad_0 = const()[name = string("op_1560_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1560_dilations_0 = const()[name = string("op_1560_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1560_groups_0 = const()[name = string("op_1560_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109188736))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110008000))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110008128)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1560_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1560_dilations_0, groups = var_1560_groups_0, pad = var_1560_pad_0, pad_type = var_1560_pad_type_0, strides = var_1560_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1560_cast_fp16")];
+            string var_1566_pad_type_0 = const()[name = string("op_1566_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1566_strides_0 = const()[name = string("op_1566_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1566_pad_0 = const()[name = string("op_1566_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1566_dilations_0 = const()[name = string("op_1566_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1566_groups_0 = const()[name = string("op_1566_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110034304))), nonzero_data = tensor<fp16, [11740]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110010752))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1566_cast_fp16 = conv(dilations = var_1566_dilations_0, groups = var_1566_groups_0, pad = var_1566_pad_0, pad_type = var_1566_pad_type_0, strides = var_1566_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1566_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_15_cast_fp16 = add(x = var_1560_cast_fp16, y = var_1566_cast_fp16)[name = string("value_15_cast_fp16")];
+            tensor<int32, [4]> var_1569 = const()[name = string("op_1569"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_15_cast_fp16 = reshape(shape = var_1569, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")];
+            fp16 var_1571_to_fp16 = const()[name = string("op_1571_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_1572_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1571_to_fp16)[name = string("op_1572_cast_fp16")];
+            tensor<int32, [4]> var_1573 = const()[name = string("op_1573"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1574_cast_fp16 = reshape(shape = var_1573, x = key_15_cast_fp16)[name = string("op_1574_cast_fp16")];
+            bool mh_w_15_transpose_x_0 = const()[name = string("mh_w_15_transpose_x_0"), val = bool(true)];
+            bool mh_w_15_transpose_y_0 = const()[name = string("mh_w_15_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1572_cast_fp16, y = var_1574_cast_fp16)[name = string("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1577_cast_fp16 = softmax(axis = var_1488, x = mh_w_15_cast_fp16)[name = string("op_1577_cast_fp16")];
+            tensor<int32, [4]> var_1578 = const()[name = string("op_1578"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1579_cast_fp16 = reshape(shape = var_1578, x = value_15_cast_fp16)[name = string("op_1579_cast_fp16")];
+            bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)];
+            bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1579_cast_fp16, y = var_1577_cast_fp16)[name = string("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_1582 = const()[name = string("op_1582"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_57_cast_fp16 = reshape(shape = var_1582, x = attn_15_cast_fp16)[name = string("input_57_cast_fp16")];
+            string var_1592_pad_type_0 = const()[name = string("op_1592_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1592_strides_0 = const()[name = string("op_1592_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1592_pad_0 = const()[name = string("op_1592_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1592_dilations_0 = const()[name = string("op_1592_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1592_groups_0 = const()[name = string("op_1592_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110239168))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111058432))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111058560)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1592_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1592_dilations_0, groups = var_1592_groups_0, pad = var_1592_pad_0, pad_type = var_1592_pad_type_0, strides = var_1592_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = string("op_1592_cast_fp16")];
+            string var_1598_pad_type_0 = const()[name = string("op_1598_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1598_strides_0 = const()[name = string("op_1598_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1598_pad_0 = const()[name = string("op_1598_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1598_dilations_0 = const()[name = string("op_1598_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1598_groups_0 = const()[name = string("op_1598_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111080064))), nonzero_data = tensor<fp16, [9395]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111061184))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1598_cast_fp16 = conv(dilations = var_1598_dilations_0, groups = var_1598_groups_0, pad = var_1598_pad_0, pad_type = var_1598_pad_type_0, strides = var_1598_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = string("op_1598_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_31_cast_fp16 = add(x = var_1592_cast_fp16, y = var_1598_cast_fp16)[name = string("obj_31_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_31_cast_fp16")];
+            tensor<int32, [1]> out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1609_to_fp16 = const()[name = string("op_1609_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1609_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")];
+            tensor<fp16, [1280]> input_59_gamma_0_to_fp16 = const()[name = string("input_59_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111284928)))];
+            tensor<fp16, [1280]> input_59_beta_0_to_fp16 = const()[name = string("input_59_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111287552)))];
+            fp16 input_59_epsilon_0_to_fp16 = const()[name = string("input_59_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("input_59_cast_fp16")];
+            string var_1627_pad_type_0 = const()[name = string("op_1627_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1627_strides_0 = const()[name = string("op_1627_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1627_pad_0 = const()[name = string("op_1627_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1627_dilations_0 = const()[name = string("op_1627_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1627_groups_0 = const()[name = string("op_1627_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111290176))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114567040))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114567168)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1627_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_1627_dilations_0, groups = var_1627_groups_0, pad = var_1627_pad_0, pad_type = var_1627_pad_type_0, strides = var_1627_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1627_cast_fp16")];
+            string var_1633_pad_type_0 = const()[name = string("op_1633_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1633_strides_0 = const()[name = string("op_1633_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1633_pad_0 = const()[name = string("op_1633_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1633_dilations_0 = const()[name = string("op_1633_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1633_groups_0 = const()[name = string("op_1633_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114597440))), nonzero_data = tensor<fp16, [9922]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114577472))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1633_cast_fp16 = conv(dilations = var_1633_dilations_0, groups = var_1633_groups_0, pad = var_1633_pad_0, pad_type = var_1633_pad_type_0, strides = var_1633_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1633_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_61_cast_fp16 = add(x = var_1627_cast_fp16, y = var_1633_cast_fp16)[name = string("input_61_cast_fp16")];
+            string input_63_mode_0 = const()[name = string("input_63_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = string("input_63_cast_fp16")];
+            string var_1644_pad_type_0 = const()[name = string("op_1644_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1644_strides_0 = const()[name = string("op_1644_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1644_pad_0 = const()[name = string("op_1644_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1644_dilations_0 = const()[name = string("op_1644_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1644_groups_0 = const()[name = string("op_1644_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115416704))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118693568))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118693696)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1644_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_1644_dilations_0, groups = var_1644_groups_0, pad = var_1644_pad_0, pad_type = var_1644_pad_type_0, strides = var_1644_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_1644_cast_fp16")];
+            string var_1650_pad_type_0 = const()[name = string("op_1650_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1650_strides_0 = const()[name = string("op_1650_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1650_pad_0 = const()[name = string("op_1650_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1650_dilations_0 = const()[name = string("op_1650_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1650_groups_0 = const()[name = string("op_1650_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118968000))), nonzero_data = tensor<fp16, [135797]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118696320))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1650_cast_fp16 = conv(dilations = var_1650_dilations_0, groups = var_1650_groups_0, pad = var_1650_pad_0, pad_type = var_1650_pad_type_0, strides = var_1650_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_1650_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_19_cast_fp16 = add(x = var_1644_cast_fp16, y = var_1650_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_33_cast_fp16")];
+            int32 var_1660 = const()[name = string("op_1660"), val = int32(3)];
+            tensor<int32, [1]> out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1679_to_fp16 = const()[name = string("op_1679_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1679_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")];
+            tensor<fp16, [1280]> obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119787264)))];
+            tensor<fp16, [1280]> obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119789888)))];
+            fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_33_cast_fp16")];
+            string var_1701_pad_type_0 = const()[name = string("op_1701_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1701_strides_0 = const()[name = string("op_1701_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1701_pad_0 = const()[name = string("op_1701_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1701_dilations_0 = const()[name = string("op_1701_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1701_groups_0 = const()[name = string("op_1701_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119792512))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120611776))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120611904)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1701_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1701_dilations_0, groups = var_1701_groups_0, pad = var_1701_pad_0, pad_type = var_1701_pad_type_0, strides = var_1701_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1701_cast_fp16")];
+            string var_1707_pad_type_0 = const()[name = string("op_1707_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1707_strides_0 = const()[name = string("op_1707_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1707_pad_0 = const()[name = string("op_1707_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1707_dilations_0 = const()[name = string("op_1707_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1707_groups_0 = const()[name = string("op_1707_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120662848))), nonzero_data = tensor<fp16, [24111]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120614528))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1707_cast_fp16 = conv(dilations = var_1707_dilations_0, groups = var_1707_groups_0, pad = var_1707_pad_0, pad_type = var_1707_pad_type_0, strides = var_1707_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1707_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_17_cast_fp16 = add(x = var_1701_cast_fp16, y = var_1707_cast_fp16)[name = string("query_17_cast_fp16")];
+            string var_1716_pad_type_0 = const()[name = string("op_1716_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1716_strides_0 = const()[name = string("op_1716_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1716_pad_0 = const()[name = string("op_1716_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1716_dilations_0 = const()[name = string("op_1716_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1716_groups_0 = const()[name = string("op_1716_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120867712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121686976))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1716_cast_fp16 = conv(dilations = var_1716_dilations_0, groups = var_1716_groups_0, pad = var_1716_pad_0, pad_type = var_1716_pad_type_0, strides = var_1716_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1716_cast_fp16")];
+            string var_1722_pad_type_0 = const()[name = string("op_1722_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1722_strides_0 = const()[name = string("op_1722_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1722_pad_0 = const()[name = string("op_1722_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1722_dilations_0 = const()[name = string("op_1722_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1722_groups_0 = const()[name = string("op_1722_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121721856))), nonzero_data = tensor<fp16, [17344]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121687104))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1722_cast_fp16 = conv(dilations = var_1722_dilations_0, groups = var_1722_groups_0, pad = var_1722_pad_0, pad_type = var_1722_pad_type_0, strides = var_1722_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1722_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_17_cast_fp16 = add(x = var_1716_cast_fp16, y = var_1722_cast_fp16)[name = string("key_17_cast_fp16")];
+            string var_1732_pad_type_0 = const()[name = string("op_1732_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1732_strides_0 = const()[name = string("op_1732_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1732_pad_0 = const()[name = string("op_1732_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1732_dilations_0 = const()[name = string("op_1732_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1732_groups_0 = const()[name = string("op_1732_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121926720))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122745984))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122746112)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1732_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1732_dilations_0, groups = var_1732_groups_0, pad = var_1732_pad_0, pad_type = var_1732_pad_type_0, strides = var_1732_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1732_cast_fp16")];
+            string var_1738_pad_type_0 = const()[name = string("op_1738_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1738_strides_0 = const()[name = string("op_1738_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1738_pad_0 = const()[name = string("op_1738_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1738_dilations_0 = const()[name = string("op_1738_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1738_groups_0 = const()[name = string("op_1738_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122771968))), nonzero_data = tensor<fp16, [11555]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122748736))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1738_cast_fp16 = conv(dilations = var_1738_dilations_0, groups = var_1738_groups_0, pad = var_1738_pad_0, pad_type = var_1738_pad_type_0, strides = var_1738_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1738_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_17_cast_fp16 = add(x = var_1732_cast_fp16, y = var_1738_cast_fp16)[name = string("value_17_cast_fp16")];
+            tensor<int32, [4]> var_1741 = const()[name = string("op_1741"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_17_cast_fp16 = reshape(shape = var_1741, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")];
+            fp16 var_1743_to_fp16 = const()[name = string("op_1743_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_1744_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1743_to_fp16)[name = string("op_1744_cast_fp16")];
+            tensor<int32, [4]> var_1745 = const()[name = string("op_1745"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1746_cast_fp16 = reshape(shape = var_1745, x = key_17_cast_fp16)[name = string("op_1746_cast_fp16")];
+            bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)];
+            bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1744_cast_fp16, y = var_1746_cast_fp16)[name = string("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1749_cast_fp16 = softmax(axis = var_1660, x = mh_w_17_cast_fp16)[name = string("op_1749_cast_fp16")];
+            tensor<int32, [4]> var_1750 = const()[name = string("op_1750"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1751_cast_fp16 = reshape(shape = var_1750, x = value_17_cast_fp16)[name = string("op_1751_cast_fp16")];
+            bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)];
+            bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1751_cast_fp16, y = var_1749_cast_fp16)[name = string("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1754 = const()[name = string("op_1754"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_65_cast_fp16 = reshape(shape = var_1754, x = attn_17_cast_fp16)[name = string("input_65_cast_fp16")];
+            string var_1764_pad_type_0 = const()[name = string("op_1764_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1764_strides_0 = const()[name = string("op_1764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1764_pad_0 = const()[name = string("op_1764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1764_dilations_0 = const()[name = string("op_1764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1764_groups_0 = const()[name = string("op_1764_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122976832))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123796096))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123796224)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1764_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1764_dilations_0, groups = var_1764_groups_0, pad = var_1764_pad_0, pad_type = var_1764_pad_type_0, strides = var_1764_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_1764_cast_fp16")];
+            string var_1770_pad_type_0 = const()[name = string("op_1770_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1770_strides_0 = const()[name = string("op_1770_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1770_pad_0 = const()[name = string("op_1770_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1770_dilations_0 = const()[name = string("op_1770_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1770_groups_0 = const()[name = string("op_1770_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123819136))), nonzero_data = tensor<fp16, [10093]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123798848))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1770_cast_fp16 = conv(dilations = var_1770_dilations_0, groups = var_1770_groups_0, pad = var_1770_pad_0, pad_type = var_1770_pad_type_0, strides = var_1770_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_1770_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_35_cast_fp16 = add(x = var_1764_cast_fp16, y = var_1770_cast_fp16)[name = string("obj_35_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = string("inputs_35_cast_fp16")];
+            tensor<int32, [1]> out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1781_to_fp16 = const()[name = string("op_1781_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1781_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")];
+            tensor<fp16, [1280]> input_67_gamma_0_to_fp16 = const()[name = string("input_67_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124024000)))];
+            tensor<fp16, [1280]> input_67_beta_0_to_fp16 = const()[name = string("input_67_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124026624)))];
+            fp16 input_67_epsilon_0_to_fp16 = const()[name = string("input_67_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_67_cast_fp16")];
+            string var_1799_pad_type_0 = const()[name = string("op_1799_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1799_strides_0 = const()[name = string("op_1799_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1799_pad_0 = const()[name = string("op_1799_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1799_dilations_0 = const()[name = string("op_1799_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1799_groups_0 = const()[name = string("op_1799_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124029248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127306112))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127306240)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1799_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_1799_dilations_0, groups = var_1799_groups_0, pad = var_1799_pad_0, pad_type = var_1799_pad_type_0, strides = var_1799_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = string("op_1799_cast_fp16")];
+            string var_1805_pad_type_0 = const()[name = string("op_1805_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1805_strides_0 = const()[name = string("op_1805_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1805_pad_0 = const()[name = string("op_1805_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1805_dilations_0 = const()[name = string("op_1805_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1805_groups_0 = const()[name = string("op_1805_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127346368))), nonzero_data = tensor<fp16, [14856]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127316544))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1805_cast_fp16 = conv(dilations = var_1805_dilations_0, groups = var_1805_groups_0, pad = var_1805_pad_0, pad_type = var_1805_pad_type_0, strides = var_1805_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = string("op_1805_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_69_cast_fp16 = add(x = var_1799_cast_fp16, y = var_1805_cast_fp16)[name = string("input_69_cast_fp16")];
+            string input_71_mode_0 = const()[name = string("input_71_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = string("input_71_cast_fp16")];
+            string var_1816_pad_type_0 = const()[name = string("op_1816_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1816_strides_0 = const()[name = string("op_1816_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1816_pad_0 = const()[name = string("op_1816_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1816_dilations_0 = const()[name = string("op_1816_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1816_groups_0 = const()[name = string("op_1816_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128165632))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131442496))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131442624)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1816_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_1816_dilations_0, groups = var_1816_groups_0, pad = var_1816_pad_0, pad_type = var_1816_pad_type_0, strides = var_1816_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_1816_cast_fp16")];
+            string var_1822_pad_type_0 = const()[name = string("op_1822_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1822_strides_0 = const()[name = string("op_1822_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1822_pad_0 = const()[name = string("op_1822_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1822_dilations_0 = const()[name = string("op_1822_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1822_groups_0 = const()[name = string("op_1822_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131699584))), nonzero_data = tensor<fp16, [127131]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131445248))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1822_cast_fp16 = conv(dilations = var_1822_dilations_0, groups = var_1822_groups_0, pad = var_1822_pad_0, pad_type = var_1822_pad_type_0, strides = var_1822_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_1822_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_21_cast_fp16 = add(x = var_1816_cast_fp16, y = var_1822_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_37_cast_fp16")];
+            int32 var_1832 = const()[name = string("op_1832"), val = int32(3)];
+            tensor<int32, [1]> out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1851_to_fp16 = const()[name = string("op_1851_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1851_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")];
+            tensor<fp16, [1280]> obj_37_gamma_0_to_fp16 = const()[name = string("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132518848)))];
+            tensor<fp16, [1280]> obj_37_beta_0_to_fp16 = const()[name = string("obj_37_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132521472)))];
+            fp16 obj_37_epsilon_0_to_fp16 = const()[name = string("obj_37_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_37_cast_fp16")];
+            string var_1873_pad_type_0 = const()[name = string("op_1873_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1873_strides_0 = const()[name = string("op_1873_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1873_pad_0 = const()[name = string("op_1873_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1873_dilations_0 = const()[name = string("op_1873_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1873_groups_0 = const()[name = string("op_1873_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132524096))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133343360))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133343488)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1873_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1873_dilations_0, groups = var_1873_groups_0, pad = var_1873_pad_0, pad_type = var_1873_pad_type_0, strides = var_1873_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1873_cast_fp16")];
+            string var_1879_pad_type_0 = const()[name = string("op_1879_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1879_strides_0 = const()[name = string("op_1879_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1879_pad_0 = const()[name = string("op_1879_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1879_dilations_0 = const()[name = string("op_1879_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1879_groups_0 = const()[name = string("op_1879_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133392896))), nonzero_data = tensor<fp16, [23337]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133346112))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1879_cast_fp16 = conv(dilations = var_1879_dilations_0, groups = var_1879_groups_0, pad = var_1879_pad_0, pad_type = var_1879_pad_type_0, strides = var_1879_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1879_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_19_cast_fp16 = add(x = var_1873_cast_fp16, y = var_1879_cast_fp16)[name = string("query_19_cast_fp16")];
+            string var_1888_pad_type_0 = const()[name = string("op_1888_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1888_strides_0 = const()[name = string("op_1888_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1888_pad_0 = const()[name = string("op_1888_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1888_dilations_0 = const()[name = string("op_1888_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1888_groups_0 = const()[name = string("op_1888_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133597760))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134417024))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1888_cast_fp16 = conv(dilations = var_1888_dilations_0, groups = var_1888_groups_0, pad = var_1888_pad_0, pad_type = var_1888_pad_type_0, strides = var_1888_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1888_cast_fp16")];
+            string var_1894_pad_type_0 = const()[name = string("op_1894_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1894_strides_0 = const()[name = string("op_1894_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1894_pad_0 = const()[name = string("op_1894_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1894_dilations_0 = const()[name = string("op_1894_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1894_groups_0 = const()[name = string("op_1894_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134451584))), nonzero_data = tensor<fp16, [17153]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134417152))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1894_cast_fp16 = conv(dilations = var_1894_dilations_0, groups = var_1894_groups_0, pad = var_1894_pad_0, pad_type = var_1894_pad_type_0, strides = var_1894_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1894_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_19_cast_fp16 = add(x = var_1888_cast_fp16, y = var_1894_cast_fp16)[name = string("key_19_cast_fp16")];
+            string var_1904_pad_type_0 = const()[name = string("op_1904_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1904_strides_0 = const()[name = string("op_1904_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1904_pad_0 = const()[name = string("op_1904_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1904_dilations_0 = const()[name = string("op_1904_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1904_groups_0 = const()[name = string("op_1904_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134656448))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135475712))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135475840)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1904_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1904_dilations_0, groups = var_1904_groups_0, pad = var_1904_pad_0, pad_type = var_1904_pad_type_0, strides = var_1904_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1904_cast_fp16")];
+            string var_1910_pad_type_0 = const()[name = string("op_1910_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1910_strides_0 = const()[name = string("op_1910_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1910_pad_0 = const()[name = string("op_1910_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1910_dilations_0 = const()[name = string("op_1910_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1910_groups_0 = const()[name = string("op_1910_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135500160))), nonzero_data = tensor<fp16, [10813]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135478464))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1910_cast_fp16 = conv(dilations = var_1910_dilations_0, groups = var_1910_groups_0, pad = var_1910_pad_0, pad_type = var_1910_pad_type_0, strides = var_1910_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1910_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_19_cast_fp16 = add(x = var_1904_cast_fp16, y = var_1910_cast_fp16)[name = string("value_19_cast_fp16")];
+            tensor<int32, [4]> var_1913 = const()[name = string("op_1913"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_19_cast_fp16 = reshape(shape = var_1913, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")];
+            fp16 var_1915_to_fp16 = const()[name = string("op_1915_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_1916_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1915_to_fp16)[name = string("op_1916_cast_fp16")];
+            tensor<int32, [4]> var_1917 = const()[name = string("op_1917"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1918_cast_fp16 = reshape(shape = var_1917, x = key_19_cast_fp16)[name = string("op_1918_cast_fp16")];
+            bool mh_w_19_transpose_x_0 = const()[name = string("mh_w_19_transpose_x_0"), val = bool(true)];
+            bool mh_w_19_transpose_y_0 = const()[name = string("mh_w_19_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1916_cast_fp16, y = var_1918_cast_fp16)[name = string("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_1921_cast_fp16 = softmax(axis = var_1832, x = mh_w_19_cast_fp16)[name = string("op_1921_cast_fp16")];
+            tensor<int32, [4]> var_1922 = const()[name = string("op_1922"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_1923_cast_fp16 = reshape(shape = var_1922, x = value_19_cast_fp16)[name = string("op_1923_cast_fp16")];
+            bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)];
+            bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1923_cast_fp16, y = var_1921_cast_fp16)[name = string("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1926 = const()[name = string("op_1926"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_73_cast_fp16 = reshape(shape = var_1926, x = attn_19_cast_fp16)[name = string("input_73_cast_fp16")];
+            string var_1936_pad_type_0 = const()[name = string("op_1936_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1936_strides_0 = const()[name = string("op_1936_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1936_pad_0 = const()[name = string("op_1936_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1936_dilations_0 = const()[name = string("op_1936_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1936_groups_0 = const()[name = string("op_1936_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135705024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136524288))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136524416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1936_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1936_dilations_0, groups = var_1936_groups_0, pad = var_1936_pad_0, pad_type = var_1936_pad_type_0, strides = var_1936_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_1936_cast_fp16")];
+            string var_1942_pad_type_0 = const()[name = string("op_1942_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1942_strides_0 = const()[name = string("op_1942_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1942_pad_0 = const()[name = string("op_1942_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1942_dilations_0 = const()[name = string("op_1942_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1942_groups_0 = const()[name = string("op_1942_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136545344))), nonzero_data = tensor<fp16, [9107]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136527040))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1942_cast_fp16 = conv(dilations = var_1942_dilations_0, groups = var_1942_groups_0, pad = var_1942_pad_0, pad_type = var_1942_pad_type_0, strides = var_1942_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_1942_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_39_cast_fp16 = add(x = var_1936_cast_fp16, y = var_1942_cast_fp16)[name = string("obj_39_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_39_cast_fp16")];
+            tensor<int32, [1]> out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1953_to_fp16 = const()[name = string("op_1953_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1953_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")];
+            tensor<fp16, [1280]> input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136750208)))];
+            tensor<fp16, [1280]> input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136752832)))];
+            fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("input_75_cast_fp16")];
+            string var_1971_pad_type_0 = const()[name = string("op_1971_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1971_strides_0 = const()[name = string("op_1971_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1971_pad_0 = const()[name = string("op_1971_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1971_dilations_0 = const()[name = string("op_1971_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1971_groups_0 = const()[name = string("op_1971_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136755456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140032320))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140032448)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1971_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_1971_dilations_0, groups = var_1971_groups_0, pad = var_1971_pad_0, pad_type = var_1971_pad_type_0, strides = var_1971_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_1971_cast_fp16")];
+            string var_1977_pad_type_0 = const()[name = string("op_1977_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1977_strides_0 = const()[name = string("op_1977_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1977_pad_0 = const()[name = string("op_1977_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1977_dilations_0 = const()[name = string("op_1977_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1977_groups_0 = const()[name = string("op_1977_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140073152))), nonzero_data = tensor<fp16, [15166]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140042752))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_1977_cast_fp16 = conv(dilations = var_1977_dilations_0, groups = var_1977_groups_0, pad = var_1977_pad_0, pad_type = var_1977_pad_type_0, strides = var_1977_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_1977_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_77_cast_fp16 = add(x = var_1971_cast_fp16, y = var_1977_cast_fp16)[name = string("input_77_cast_fp16")];
+            string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")];
+            string var_1988_pad_type_0 = const()[name = string("op_1988_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1988_strides_0 = const()[name = string("op_1988_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1988_pad_0 = const()[name = string("op_1988_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1988_dilations_0 = const()[name = string("op_1988_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1988_groups_0 = const()[name = string("op_1988_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140892416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144169280))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144169408)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1988_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_1988_dilations_0, groups = var_1988_groups_0, pad = var_1988_pad_0, pad_type = var_1988_pad_type_0, strides = var_1988_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_1988_cast_fp16")];
+            string var_1994_pad_type_0 = const()[name = string("op_1994_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1994_strides_0 = const()[name = string("op_1994_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1994_pad_0 = const()[name = string("op_1994_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1994_dilations_0 = const()[name = string("op_1994_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1994_groups_0 = const()[name = string("op_1994_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144438144))), nonzero_data = tensor<fp16, [133022]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144172032))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_1994_cast_fp16 = conv(dilations = var_1994_dilations_0, groups = var_1994_groups_0, pad = var_1994_pad_0, pad_type = var_1994_pad_type_0, strides = var_1994_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_1994_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_23_cast_fp16 = add(x = var_1988_cast_fp16, y = var_1994_cast_fp16)[name = string("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_41_cast_fp16")];
+            int32 var_2004 = const()[name = string("op_2004"), val = int32(3)];
+            tensor<int32, [1]> out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2023_to_fp16 = const()[name = string("op_2023_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_2023_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")];
+            tensor<fp16, [1280]> obj_41_gamma_0_to_fp16 = const()[name = string("obj_41_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145257408)))];
+            tensor<fp16, [1280]> obj_41_beta_0_to_fp16 = const()[name = string("obj_41_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145260032)))];
+            fp16 obj_41_epsilon_0_to_fp16 = const()[name = string("obj_41_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("obj_41_cast_fp16")];
+            string var_2045_pad_type_0 = const()[name = string("op_2045_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2045_strides_0 = const()[name = string("op_2045_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2045_pad_0 = const()[name = string("op_2045_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2045_dilations_0 = const()[name = string("op_2045_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2045_groups_0 = const()[name = string("op_2045_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(145262656))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146081920))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146082048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2045_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2045_dilations_0, groups = var_2045_groups_0, pad = var_2045_pad_0, pad_type = var_2045_pad_type_0, strides = var_2045_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2045_cast_fp16")];
+            string var_2051_pad_type_0 = const()[name = string("op_2051_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2051_strides_0 = const()[name = string("op_2051_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2051_pad_0 = const()[name = string("op_2051_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2051_dilations_0 = const()[name = string("op_2051_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2051_groups_0 = const()[name = string("op_2051_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146130624))), nonzero_data = tensor<fp16, [22915]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146084672))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2051_cast_fp16 = conv(dilations = var_2051_dilations_0, groups = var_2051_groups_0, pad = var_2051_pad_0, pad_type = var_2051_pad_type_0, strides = var_2051_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2051_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_21_cast_fp16 = add(x = var_2045_cast_fp16, y = var_2051_cast_fp16)[name = string("query_21_cast_fp16")];
+            string var_2060_pad_type_0 = const()[name = string("op_2060_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2060_strides_0 = const()[name = string("op_2060_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2060_pad_0 = const()[name = string("op_2060_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2060_dilations_0 = const()[name = string("op_2060_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2060_groups_0 = const()[name = string("op_2060_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146335488))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147154752))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2060_cast_fp16 = conv(dilations = var_2060_dilations_0, groups = var_2060_groups_0, pad = var_2060_pad_0, pad_type = var_2060_pad_type_0, strides = var_2060_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2060_cast_fp16")];
+            string var_2066_pad_type_0 = const()[name = string("op_2066_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2066_strides_0 = const()[name = string("op_2066_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2066_pad_0 = const()[name = string("op_2066_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2066_dilations_0 = const()[name = string("op_2066_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2066_groups_0 = const()[name = string("op_2066_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147187392))), nonzero_data = tensor<fp16, [16203]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147154880))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2066_cast_fp16 = conv(dilations = var_2066_dilations_0, groups = var_2066_groups_0, pad = var_2066_pad_0, pad_type = var_2066_pad_type_0, strides = var_2066_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2066_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_21_cast_fp16 = add(x = var_2060_cast_fp16, y = var_2066_cast_fp16)[name = string("key_21_cast_fp16")];
+            string var_2076_pad_type_0 = const()[name = string("op_2076_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2076_strides_0 = const()[name = string("op_2076_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2076_pad_0 = const()[name = string("op_2076_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2076_dilations_0 = const()[name = string("op_2076_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2076_groups_0 = const()[name = string("op_2076_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147392256))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148211520))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148211648)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2076_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2076_dilations_0, groups = var_2076_groups_0, pad = var_2076_pad_0, pad_type = var_2076_pad_type_0, strides = var_2076_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2076_cast_fp16")];
+            string var_2082_pad_type_0 = const()[name = string("op_2082_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2082_strides_0 = const()[name = string("op_2082_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2082_pad_0 = const()[name = string("op_2082_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2082_dilations_0 = const()[name = string("op_2082_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2082_groups_0 = const()[name = string("op_2082_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148234688))), nonzero_data = tensor<fp16, [10173]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148214272))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2082_cast_fp16 = conv(dilations = var_2082_dilations_0, groups = var_2082_groups_0, pad = var_2082_pad_0, pad_type = var_2082_pad_type_0, strides = var_2082_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2082_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_21_cast_fp16 = add(x = var_2076_cast_fp16, y = var_2082_cast_fp16)[name = string("value_21_cast_fp16")];
+            tensor<int32, [4]> var_2085 = const()[name = string("op_2085"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_21_cast_fp16 = reshape(shape = var_2085, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")];
+            fp16 var_2087_to_fp16 = const()[name = string("op_2087_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_2088_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_2087_to_fp16)[name = string("op_2088_cast_fp16")];
+            tensor<int32, [4]> var_2089 = const()[name = string("op_2089"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2090_cast_fp16 = reshape(shape = var_2089, x = key_21_cast_fp16)[name = string("op_2090_cast_fp16")];
+            bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)];
+            bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2088_cast_fp16, y = var_2090_cast_fp16)[name = string("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2093_cast_fp16 = softmax(axis = var_2004, x = mh_w_21_cast_fp16)[name = string("op_2093_cast_fp16")];
+            tensor<int32, [4]> var_2094 = const()[name = string("op_2094"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2095_cast_fp16 = reshape(shape = var_2094, x = value_21_cast_fp16)[name = string("op_2095_cast_fp16")];
+            bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)];
+            bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_2095_cast_fp16, y = var_2093_cast_fp16)[name = string("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_2098 = const()[name = string("op_2098"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_81_cast_fp16 = reshape(shape = var_2098, x = attn_21_cast_fp16)[name = string("input_81_cast_fp16")];
+            string var_2108_pad_type_0 = const()[name = string("op_2108_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2108_strides_0 = const()[name = string("op_2108_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2108_pad_0 = const()[name = string("op_2108_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2108_dilations_0 = const()[name = string("op_2108_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2108_groups_0 = const()[name = string("op_2108_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148439552))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149258816))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149258944)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2108_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2108_dilations_0, groups = var_2108_groups_0, pad = var_2108_pad_0, pad_type = var_2108_pad_type_0, strides = var_2108_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2108_cast_fp16")];
+            string var_2114_pad_type_0 = const()[name = string("op_2114_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2114_strides_0 = const()[name = string("op_2114_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2114_pad_0 = const()[name = string("op_2114_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2114_dilations_0 = const()[name = string("op_2114_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2114_groups_0 = const()[name = string("op_2114_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149280384))), nonzero_data = tensor<fp16, [9361]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149261568))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2114_cast_fp16 = conv(dilations = var_2114_dilations_0, groups = var_2114_groups_0, pad = var_2114_pad_0, pad_type = var_2114_pad_type_0, strides = var_2114_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2114_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_43_cast_fp16 = add(x = var_2108_cast_fp16, y = var_2114_cast_fp16)[name = string("obj_43_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = string("inputs_43_cast_fp16")];
+            tensor<int32, [1]> out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2125_to_fp16 = const()[name = string("op_2125_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2125_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")];
+            tensor<fp16, [1280]> input_83_gamma_0_to_fp16 = const()[name = string("input_83_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149485248)))];
+            tensor<fp16, [1280]> input_83_beta_0_to_fp16 = const()[name = string("input_83_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149487872)))];
+            fp16 input_83_epsilon_0_to_fp16 = const()[name = string("input_83_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("input_83_cast_fp16")];
+            string var_2143_pad_type_0 = const()[name = string("op_2143_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2143_strides_0 = const()[name = string("op_2143_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2143_pad_0 = const()[name = string("op_2143_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2143_dilations_0 = const()[name = string("op_2143_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2143_groups_0 = const()[name = string("op_2143_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149490496))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152767360))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152767488)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2143_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_2143_dilations_0, groups = var_2143_groups_0, pad = var_2143_pad_0, pad_type = var_2143_pad_type_0, strides = var_2143_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2143_cast_fp16")];
+            string var_2149_pad_type_0 = const()[name = string("op_2149_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2149_strides_0 = const()[name = string("op_2149_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2149_pad_0 = const()[name = string("op_2149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2149_dilations_0 = const()[name = string("op_2149_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2149_groups_0 = const()[name = string("op_2149_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152808704))), nonzero_data = tensor<fp16, [15418]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152777792))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2149_cast_fp16 = conv(dilations = var_2149_dilations_0, groups = var_2149_groups_0, pad = var_2149_pad_0, pad_type = var_2149_pad_type_0, strides = var_2149_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2149_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_85_cast_fp16 = add(x = var_2143_cast_fp16, y = var_2149_cast_fp16)[name = string("input_85_cast_fp16")];
+            string input_87_mode_0 = const()[name = string("input_87_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = string("input_87_cast_fp16")];
+            string var_2160_pad_type_0 = const()[name = string("op_2160_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2160_strides_0 = const()[name = string("op_2160_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2160_pad_0 = const()[name = string("op_2160_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2160_dilations_0 = const()[name = string("op_2160_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2160_groups_0 = const()[name = string("op_2160_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153627968))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156904832))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156904960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2160_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_2160_dilations_0, groups = var_2160_groups_0, pad = var_2160_pad_0, pad_type = var_2160_pad_type_0, strides = var_2160_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = string("op_2160_cast_fp16")];
+            string var_2166_pad_type_0 = const()[name = string("op_2166_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2166_strides_0 = const()[name = string("op_2166_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2166_pad_0 = const()[name = string("op_2166_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2166_dilations_0 = const()[name = string("op_2166_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2166_groups_0 = const()[name = string("op_2166_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157157056))), nonzero_data = tensor<fp16, [124674]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(156907584))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2166_cast_fp16 = conv(dilations = var_2166_dilations_0, groups = var_2166_groups_0, pad = var_2166_pad_0, pad_type = var_2166_pad_type_0, strides = var_2166_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = string("op_2166_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_25_cast_fp16 = add(x = var_2160_cast_fp16, y = var_2166_cast_fp16)[name = string("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_45_cast_fp16")];
+            int32 var_2176 = const()[name = string("op_2176"), val = int32(3)];
+            tensor<int32, [1]> out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2195_to_fp16 = const()[name = string("op_2195_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2195_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")];
+            tensor<fp16, [1280]> obj_45_gamma_0_to_fp16 = const()[name = string("obj_45_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157976320)))];
+            tensor<fp16, [1280]> obj_45_beta_0_to_fp16 = const()[name = string("obj_45_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157978944)))];
+            fp16 obj_45_epsilon_0_to_fp16 = const()[name = string("obj_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_45_cast_fp16")];
+            string var_2217_pad_type_0 = const()[name = string("op_2217_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2217_strides_0 = const()[name = string("op_2217_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2217_pad_0 = const()[name = string("op_2217_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2217_dilations_0 = const()[name = string("op_2217_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2217_groups_0 = const()[name = string("op_2217_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157981568))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158800832))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158800960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2217_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2217_dilations_0, groups = var_2217_groups_0, pad = var_2217_pad_0, pad_type = var_2217_pad_type_0, strides = var_2217_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2217_cast_fp16")];
+            string var_2223_pad_type_0 = const()[name = string("op_2223_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2223_strides_0 = const()[name = string("op_2223_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2223_pad_0 = const()[name = string("op_2223_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2223_dilations_0 = const()[name = string("op_2223_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2223_groups_0 = const()[name = string("op_2223_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158852864))), nonzero_data = tensor<fp16, [24598]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158803584))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2223_cast_fp16 = conv(dilations = var_2223_dilations_0, groups = var_2223_groups_0, pad = var_2223_pad_0, pad_type = var_2223_pad_type_0, strides = var_2223_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2223_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_23_cast_fp16 = add(x = var_2217_cast_fp16, y = var_2223_cast_fp16)[name = string("query_23_cast_fp16")];
+            string var_2232_pad_type_0 = const()[name = string("op_2232_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2232_strides_0 = const()[name = string("op_2232_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2232_pad_0 = const()[name = string("op_2232_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2232_dilations_0 = const()[name = string("op_2232_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2232_groups_0 = const()[name = string("op_2232_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159057728))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159876992))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2232_cast_fp16 = conv(dilations = var_2232_dilations_0, groups = var_2232_groups_0, pad = var_2232_pad_0, pad_type = var_2232_pad_type_0, strides = var_2232_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2232_cast_fp16")];
+            string var_2238_pad_type_0 = const()[name = string("op_2238_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2238_strides_0 = const()[name = string("op_2238_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2238_pad_0 = const()[name = string("op_2238_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2238_dilations_0 = const()[name = string("op_2238_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2238_groups_0 = const()[name = string("op_2238_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159906048))), nonzero_data = tensor<fp16, [14404]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159877120))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2238_cast_fp16 = conv(dilations = var_2238_dilations_0, groups = var_2238_groups_0, pad = var_2238_pad_0, pad_type = var_2238_pad_type_0, strides = var_2238_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2238_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_23_cast_fp16 = add(x = var_2232_cast_fp16, y = var_2238_cast_fp16)[name = string("key_23_cast_fp16")];
+            string var_2248_pad_type_0 = const()[name = string("op_2248_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2248_strides_0 = const()[name = string("op_2248_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2248_pad_0 = const()[name = string("op_2248_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2248_dilations_0 = const()[name = string("op_2248_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2248_groups_0 = const()[name = string("op_2248_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160110912))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160930176))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160930304)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2248_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2248_dilations_0, groups = var_2248_groups_0, pad = var_2248_pad_0, pad_type = var_2248_pad_type_0, strides = var_2248_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2248_cast_fp16")];
+            string var_2254_pad_type_0 = const()[name = string("op_2254_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2254_strides_0 = const()[name = string("op_2254_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2254_pad_0 = const()[name = string("op_2254_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2254_dilations_0 = const()[name = string("op_2254_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2254_groups_0 = const()[name = string("op_2254_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160953216))), nonzero_data = tensor<fp16, [10097]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160932928))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2254_cast_fp16 = conv(dilations = var_2254_dilations_0, groups = var_2254_groups_0, pad = var_2254_pad_0, pad_type = var_2254_pad_type_0, strides = var_2254_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2254_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_23_cast_fp16 = add(x = var_2248_cast_fp16, y = var_2254_cast_fp16)[name = string("value_23_cast_fp16")];
+            tensor<int32, [4]> var_2257 = const()[name = string("op_2257"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_23_cast_fp16 = reshape(shape = var_2257, x = query_23_cast_fp16)[name = string("mh_q_23_cast_fp16")];
+            fp16 var_2259_to_fp16 = const()[name = string("op_2259_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_2260_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_2259_to_fp16)[name = string("op_2260_cast_fp16")];
+            tensor<int32, [4]> var_2261 = const()[name = string("op_2261"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2262_cast_fp16 = reshape(shape = var_2261, x = key_23_cast_fp16)[name = string("op_2262_cast_fp16")];
+            bool mh_w_23_transpose_x_0 = const()[name = string("mh_w_23_transpose_x_0"), val = bool(true)];
+            bool mh_w_23_transpose_y_0 = const()[name = string("mh_w_23_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_2260_cast_fp16, y = var_2262_cast_fp16)[name = string("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2265_cast_fp16 = softmax(axis = var_2176, x = mh_w_23_cast_fp16)[name = string("op_2265_cast_fp16")];
+            tensor<int32, [4]> var_2266 = const()[name = string("op_2266"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2267_cast_fp16 = reshape(shape = var_2266, x = value_23_cast_fp16)[name = string("op_2267_cast_fp16")];
+            bool attn_23_transpose_x_0 = const()[name = string("attn_23_transpose_x_0"), val = bool(false)];
+            bool attn_23_transpose_y_0 = const()[name = string("attn_23_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_2267_cast_fp16, y = var_2265_cast_fp16)[name = string("attn_23_cast_fp16")];
+            tensor<int32, [4]> var_2270 = const()[name = string("op_2270"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_89_cast_fp16 = reshape(shape = var_2270, x = attn_23_cast_fp16)[name = string("input_89_cast_fp16")];
+            string var_2280_pad_type_0 = const()[name = string("op_2280_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2280_strides_0 = const()[name = string("op_2280_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2280_pad_0 = const()[name = string("op_2280_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2280_dilations_0 = const()[name = string("op_2280_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2280_groups_0 = const()[name = string("op_2280_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161158080))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161977344))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161977472)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2280_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2280_dilations_0, groups = var_2280_groups_0, pad = var_2280_pad_0, pad_type = var_2280_pad_type_0, strides = var_2280_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2280_cast_fp16")];
+            string var_2286_pad_type_0 = const()[name = string("op_2286_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2286_strides_0 = const()[name = string("op_2286_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2286_pad_0 = const()[name = string("op_2286_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2286_dilations_0 = const()[name = string("op_2286_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2286_groups_0 = const()[name = string("op_2286_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161997632))), nonzero_data = tensor<fp16, [8713]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161980096))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2286_cast_fp16 = conv(dilations = var_2286_dilations_0, groups = var_2286_groups_0, pad = var_2286_pad_0, pad_type = var_2286_pad_type_0, strides = var_2286_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2286_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_47_cast_fp16 = add(x = var_2280_cast_fp16, y = var_2286_cast_fp16)[name = string("obj_47_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_47_cast_fp16)[name = string("inputs_47_cast_fp16")];
+            tensor<int32, [1]> out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2297_to_fp16 = const()[name = string("op_2297_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2297_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")];
+            tensor<fp16, [1280]> input_91_gamma_0_to_fp16 = const()[name = string("input_91_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162202496)))];
+            tensor<fp16, [1280]> input_91_beta_0_to_fp16 = const()[name = string("input_91_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162205120)))];
+            fp16 input_91_epsilon_0_to_fp16 = const()[name = string("input_91_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_91_cast_fp16")];
+            string var_2315_pad_type_0 = const()[name = string("op_2315_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2315_strides_0 = const()[name = string("op_2315_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2315_pad_0 = const()[name = string("op_2315_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2315_dilations_0 = const()[name = string("op_2315_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2315_groups_0 = const()[name = string("op_2315_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162207744))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165484608))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165484736)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2315_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_2315_dilations_0, groups = var_2315_groups_0, pad = var_2315_pad_0, pad_type = var_2315_pad_type_0, strides = var_2315_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2315_cast_fp16")];
+            string var_2321_pad_type_0 = const()[name = string("op_2321_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2321_strides_0 = const()[name = string("op_2321_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2321_pad_0 = const()[name = string("op_2321_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2321_dilations_0 = const()[name = string("op_2321_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2321_groups_0 = const()[name = string("op_2321_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165526016))), nonzero_data = tensor<fp16, [15430]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165495040))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2321_cast_fp16 = conv(dilations = var_2321_dilations_0, groups = var_2321_groups_0, pad = var_2321_pad_0, pad_type = var_2321_pad_type_0, strides = var_2321_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2321_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_93_cast_fp16 = add(x = var_2315_cast_fp16, y = var_2321_cast_fp16)[name = string("input_93_cast_fp16")];
+            string input_95_mode_0 = const()[name = string("input_95_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = string("input_95_cast_fp16")];
+            string var_2332_pad_type_0 = const()[name = string("op_2332_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2332_strides_0 = const()[name = string("op_2332_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2332_pad_0 = const()[name = string("op_2332_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2332_dilations_0 = const()[name = string("op_2332_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2332_groups_0 = const()[name = string("op_2332_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166345280))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169622144))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169622272)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2332_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_2332_dilations_0, groups = var_2332_groups_0, pad = var_2332_pad_0, pad_type = var_2332_pad_type_0, strides = var_2332_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2332_cast_fp16")];
+            string var_2338_pad_type_0 = const()[name = string("op_2338_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2338_strides_0 = const()[name = string("op_2338_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2338_pad_0 = const()[name = string("op_2338_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2338_dilations_0 = const()[name = string("op_2338_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2338_groups_0 = const()[name = string("op_2338_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169852352))), nonzero_data = tensor<fp16, [113683]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169624896))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2338_cast_fp16 = conv(dilations = var_2338_dilations_0, groups = var_2338_groups_0, pad = var_2338_pad_0, pad_type = var_2338_pad_type_0, strides = var_2338_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2338_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_27_cast_fp16 = add(x = var_2332_cast_fp16, y = var_2338_cast_fp16)[name = string("hidden_states_27_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_27_cast_fp16)[name = string("inputs_49_cast_fp16")];
+            int32 var_2348 = const()[name = string("op_2348"), val = int32(3)];
+            tensor<int32, [1]> out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2367_to_fp16 = const()[name = string("op_2367_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_2367_to_fp16, x = inputs_49_cast_fp16)[name = string("out_49_cast_fp16")];
+            tensor<fp16, [1280]> obj_49_gamma_0_to_fp16 = const()[name = string("obj_49_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170671616)))];
+            tensor<fp16, [1280]> obj_49_beta_0_to_fp16 = const()[name = string("obj_49_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170674240)))];
+            fp16 obj_49_epsilon_0_to_fp16 = const()[name = string("obj_49_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_49_cast_fp16 = batch_norm(beta = obj_49_beta_0_to_fp16, epsilon = obj_49_epsilon_0_to_fp16, gamma = obj_49_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = string("obj_49_cast_fp16")];
+            string var_2389_pad_type_0 = const()[name = string("op_2389_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2389_strides_0 = const()[name = string("op_2389_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2389_pad_0 = const()[name = string("op_2389_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2389_dilations_0 = const()[name = string("op_2389_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2389_groups_0 = const()[name = string("op_2389_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(170676864))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171496128))))[name = string("layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_12_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_12_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171496256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2389_cast_fp16 = conv(bias = layers_12_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2389_dilations_0, groups = var_2389_groups_0, pad = var_2389_pad_0, pad_type = var_2389_pad_type_0, strides = var_2389_strides_0, weight = layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = string("op_2389_cast_fp16")];
+            string var_2395_pad_type_0 = const()[name = string("op_2395_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2395_strides_0 = const()[name = string("op_2395_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2395_pad_0 = const()[name = string("op_2395_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2395_dilations_0 = const()[name = string("op_2395_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2395_groups_0 = const()[name = string("op_2395_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171547328))), nonzero_data = tensor<fp16, [24167]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171498880))))[name = string("layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2395_cast_fp16 = conv(dilations = var_2395_dilations_0, groups = var_2395_groups_0, pad = var_2395_pad_0, pad_type = var_2395_pad_type_0, strides = var_2395_strides_0, weight = layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = string("op_2395_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_25_cast_fp16 = add(x = var_2389_cast_fp16, y = var_2395_cast_fp16)[name = string("query_25_cast_fp16")];
+            string var_2404_pad_type_0 = const()[name = string("op_2404_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2404_strides_0 = const()[name = string("op_2404_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2404_pad_0 = const()[name = string("op_2404_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2404_dilations_0 = const()[name = string("op_2404_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2404_groups_0 = const()[name = string("op_2404_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171752192))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172571456))))[name = string("layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2404_cast_fp16 = conv(dilations = var_2404_dilations_0, groups = var_2404_groups_0, pad = var_2404_pad_0, pad_type = var_2404_pad_type_0, strides = var_2404_strides_0, weight = layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = string("op_2404_cast_fp16")];
+            string var_2410_pad_type_0 = const()[name = string("op_2410_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2410_strides_0 = const()[name = string("op_2410_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2410_pad_0 = const()[name = string("op_2410_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2410_dilations_0 = const()[name = string("op_2410_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2410_groups_0 = const()[name = string("op_2410_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172602112))), nonzero_data = tensor<fp16, [15212]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172571584))))[name = string("layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2410_cast_fp16 = conv(dilations = var_2410_dilations_0, groups = var_2410_groups_0, pad = var_2410_pad_0, pad_type = var_2410_pad_type_0, strides = var_2410_strides_0, weight = layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = string("op_2410_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_25_cast_fp16 = add(x = var_2404_cast_fp16, y = var_2410_cast_fp16)[name = string("key_25_cast_fp16")];
+            string var_2420_pad_type_0 = const()[name = string("op_2420_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2420_strides_0 = const()[name = string("op_2420_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2420_pad_0 = const()[name = string("op_2420_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2420_dilations_0 = const()[name = string("op_2420_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2420_groups_0 = const()[name = string("op_2420_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172806976))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173626240))))[name = string("layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_12_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_12_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173626368)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2420_cast_fp16 = conv(bias = layers_12_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2420_dilations_0, groups = var_2420_groups_0, pad = var_2420_pad_0, pad_type = var_2420_pad_type_0, strides = var_2420_strides_0, weight = layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = string("op_2420_cast_fp16")];
+            string var_2426_pad_type_0 = const()[name = string("op_2426_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2426_strides_0 = const()[name = string("op_2426_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2426_pad_0 = const()[name = string("op_2426_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2426_dilations_0 = const()[name = string("op_2426_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2426_groups_0 = const()[name = string("op_2426_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173648128))), nonzero_data = tensor<fp16, [9512]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173628992))))[name = string("layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2426_cast_fp16 = conv(dilations = var_2426_dilations_0, groups = var_2426_groups_0, pad = var_2426_pad_0, pad_type = var_2426_pad_type_0, strides = var_2426_strides_0, weight = layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = string("op_2426_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_25_cast_fp16 = add(x = var_2420_cast_fp16, y = var_2426_cast_fp16)[name = string("value_25_cast_fp16")];
+            tensor<int32, [4]> var_2429 = const()[name = string("op_2429"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_25_cast_fp16 = reshape(shape = var_2429, x = query_25_cast_fp16)[name = string("mh_q_25_cast_fp16")];
+            fp16 var_2431_to_fp16 = const()[name = string("op_2431_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_2432_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_2431_to_fp16)[name = string("op_2432_cast_fp16")];
+            tensor<int32, [4]> var_2433 = const()[name = string("op_2433"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2434_cast_fp16 = reshape(shape = var_2433, x = key_25_cast_fp16)[name = string("op_2434_cast_fp16")];
+            bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)];
+            bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_2432_cast_fp16, y = var_2434_cast_fp16)[name = string("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2437_cast_fp16 = softmax(axis = var_2348, x = mh_w_25_cast_fp16)[name = string("op_2437_cast_fp16")];
+            tensor<int32, [4]> var_2438 = const()[name = string("op_2438"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2439_cast_fp16 = reshape(shape = var_2438, x = value_25_cast_fp16)[name = string("op_2439_cast_fp16")];
+            bool attn_25_transpose_x_0 = const()[name = string("attn_25_transpose_x_0"), val = bool(false)];
+            bool attn_25_transpose_y_0 = const()[name = string("attn_25_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_2439_cast_fp16, y = var_2437_cast_fp16)[name = string("attn_25_cast_fp16")];
+            tensor<int32, [4]> var_2442 = const()[name = string("op_2442"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_97_cast_fp16 = reshape(shape = var_2442, x = attn_25_cast_fp16)[name = string("input_97_cast_fp16")];
+            string var_2452_pad_type_0 = const()[name = string("op_2452_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2452_strides_0 = const()[name = string("op_2452_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2452_pad_0 = const()[name = string("op_2452_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2452_dilations_0 = const()[name = string("op_2452_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2452_groups_0 = const()[name = string("op_2452_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173852992))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174672256))))[name = string("layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_12_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_12_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174672384)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2452_cast_fp16 = conv(bias = layers_12_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2452_dilations_0, groups = var_2452_groups_0, pad = var_2452_pad_0, pad_type = var_2452_pad_type_0, strides = var_2452_strides_0, weight = layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_97_cast_fp16)[name = string("op_2452_cast_fp16")];
+            string var_2458_pad_type_0 = const()[name = string("op_2458_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2458_strides_0 = const()[name = string("op_2458_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2458_pad_0 = const()[name = string("op_2458_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2458_dilations_0 = const()[name = string("op_2458_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2458_groups_0 = const()[name = string("op_2458_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174690880))), nonzero_data = tensor<fp16, [7893]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174675008))))[name = string("layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2458_cast_fp16 = conv(dilations = var_2458_dilations_0, groups = var_2458_groups_0, pad = var_2458_pad_0, pad_type = var_2458_pad_type_0, strides = var_2458_strides_0, weight = layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_97_cast_fp16)[name = string("op_2458_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_51_cast_fp16 = add(x = var_2452_cast_fp16, y = var_2458_cast_fp16)[name = string("obj_51_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_51_cast_fp16)[name = string("inputs_51_cast_fp16")];
+            tensor<int32, [1]> out_51_axes_0 = const()[name = string("out_51_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2469_to_fp16 = const()[name = string("op_2469_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2469_to_fp16, x = inputs_51_cast_fp16)[name = string("out_51_cast_fp16")];
+            tensor<fp16, [1280]> input_99_gamma_0_to_fp16 = const()[name = string("input_99_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174895744)))];
+            tensor<fp16, [1280]> input_99_beta_0_to_fp16 = const()[name = string("input_99_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174898368)))];
+            fp16 input_99_epsilon_0_to_fp16 = const()[name = string("input_99_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_99_cast_fp16 = batch_norm(beta = input_99_beta_0_to_fp16, epsilon = input_99_epsilon_0_to_fp16, gamma = input_99_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = string("input_99_cast_fp16")];
+            string var_2487_pad_type_0 = const()[name = string("op_2487_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2487_strides_0 = const()[name = string("op_2487_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2487_pad_0 = const()[name = string("op_2487_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2487_dilations_0 = const()[name = string("op_2487_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2487_groups_0 = const()[name = string("op_2487_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_12_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174900992))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178177856))))[name = string("layers_12_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_12_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_12_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178177984)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2487_cast_fp16 = conv(bias = layers_12_fc1_inlier_module_bias_to_fp16, dilations = var_2487_dilations_0, groups = var_2487_groups_0, pad = var_2487_pad_0, pad_type = var_2487_pad_type_0, strides = var_2487_strides_0, weight = layers_12_fc1_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("op_2487_cast_fp16")];
+            string var_2493_pad_type_0 = const()[name = string("op_2493_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2493_strides_0 = const()[name = string("op_2493_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2493_pad_0 = const()[name = string("op_2493_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2493_dilations_0 = const()[name = string("op_2493_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2493_groups_0 = const()[name = string("op_2493_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_12_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178232512))), nonzero_data = tensor<fp16, [22050]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178188288))))[name = string("layers_12_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2493_cast_fp16 = conv(dilations = var_2493_dilations_0, groups = var_2493_groups_0, pad = var_2493_pad_0, pad_type = var_2493_pad_type_0, strides = var_2493_strides_0, weight = layers_12_fc1_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = string("op_2493_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_101_cast_fp16 = add(x = var_2487_cast_fp16, y = var_2493_cast_fp16)[name = string("input_101_cast_fp16")];
+            string input_103_mode_0 = const()[name = string("input_103_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_103_cast_fp16 = gelu(mode = input_103_mode_0, x = input_101_cast_fp16)[name = string("input_103_cast_fp16")];
+            string var_2504_pad_type_0 = const()[name = string("op_2504_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2504_strides_0 = const()[name = string("op_2504_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2504_pad_0 = const()[name = string("op_2504_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2504_dilations_0 = const()[name = string("op_2504_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2504_groups_0 = const()[name = string("op_2504_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_12_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179051776))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182328640))))[name = string("layers_12_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_12_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_12_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182328768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2504_cast_fp16 = conv(bias = layers_12_fc2_inlier_module_bias_to_fp16, dilations = var_2504_dilations_0, groups = var_2504_groups_0, pad = var_2504_pad_0, pad_type = var_2504_pad_type_0, strides = var_2504_strides_0, weight = layers_12_fc2_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("op_2504_cast_fp16")];
+            string var_2510_pad_type_0 = const()[name = string("op_2510_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2510_strides_0 = const()[name = string("op_2510_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2510_pad_0 = const()[name = string("op_2510_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2510_dilations_0 = const()[name = string("op_2510_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2510_groups_0 = const()[name = string("op_2510_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_12_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182509056))), nonzero_data = tensor<fp16, [88781]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182331392))))[name = string("layers_12_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2510_cast_fp16 = conv(dilations = var_2510_dilations_0, groups = var_2510_groups_0, pad = var_2510_pad_0, pad_type = var_2510_pad_type_0, strides = var_2510_strides_0, weight = layers_12_fc2_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = string("op_2510_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_29_cast_fp16 = add(x = var_2504_cast_fp16, y = var_2510_cast_fp16)[name = string("hidden_states_29_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = hidden_states_29_cast_fp16)[name = string("inputs_53_cast_fp16")];
+            int32 var_2520 = const()[name = string("op_2520"), val = int32(3)];
+            tensor<int32, [1]> out_53_axes_0 = const()[name = string("out_53_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2539_to_fp16 = const()[name = string("op_2539_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2539_to_fp16, x = inputs_53_cast_fp16)[name = string("out_53_cast_fp16")];
+            tensor<fp16, [1280]> obj_53_gamma_0_to_fp16 = const()[name = string("obj_53_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183328320)))];
+            tensor<fp16, [1280]> obj_53_beta_0_to_fp16 = const()[name = string("obj_53_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183330944)))];
+            fp16 obj_53_epsilon_0_to_fp16 = const()[name = string("obj_53_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_53_cast_fp16 = batch_norm(beta = obj_53_beta_0_to_fp16, epsilon = obj_53_epsilon_0_to_fp16, gamma = obj_53_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = string("obj_53_cast_fp16")];
+            string var_2561_pad_type_0 = const()[name = string("op_2561_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2561_strides_0 = const()[name = string("op_2561_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2561_pad_0 = const()[name = string("op_2561_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2561_dilations_0 = const()[name = string("op_2561_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2561_groups_0 = const()[name = string("op_2561_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183333568))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184152832))))[name = string("layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_13_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_13_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184152960)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2561_cast_fp16 = conv(bias = layers_13_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2561_dilations_0, groups = var_2561_groups_0, pad = var_2561_pad_0, pad_type = var_2561_pad_type_0, strides = var_2561_strides_0, weight = layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = string("op_2561_cast_fp16")];
+            string var_2567_pad_type_0 = const()[name = string("op_2567_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2567_strides_0 = const()[name = string("op_2567_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2567_pad_0 = const()[name = string("op_2567_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2567_dilations_0 = const()[name = string("op_2567_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2567_groups_0 = const()[name = string("op_2567_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184209280))), nonzero_data = tensor<fp16, [26789]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184155584))))[name = string("layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2567_cast_fp16 = conv(dilations = var_2567_dilations_0, groups = var_2567_groups_0, pad = var_2567_pad_0, pad_type = var_2567_pad_type_0, strides = var_2567_strides_0, weight = layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = string("op_2567_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_27_cast_fp16 = add(x = var_2561_cast_fp16, y = var_2567_cast_fp16)[name = string("query_27_cast_fp16")];
+            string var_2576_pad_type_0 = const()[name = string("op_2576_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2576_strides_0 = const()[name = string("op_2576_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2576_pad_0 = const()[name = string("op_2576_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2576_dilations_0 = const()[name = string("op_2576_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2576_groups_0 = const()[name = string("op_2576_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184414144))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185233408))))[name = string("layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2576_cast_fp16 = conv(dilations = var_2576_dilations_0, groups = var_2576_groups_0, pad = var_2576_pad_0, pad_type = var_2576_pad_type_0, strides = var_2576_strides_0, weight = layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = string("op_2576_cast_fp16")];
+            string var_2582_pad_type_0 = const()[name = string("op_2582_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2582_strides_0 = const()[name = string("op_2582_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2582_pad_0 = const()[name = string("op_2582_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2582_dilations_0 = const()[name = string("op_2582_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2582_groups_0 = const()[name = string("op_2582_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185265728))), nonzero_data = tensor<fp16, [16047]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185233536))))[name = string("layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2582_cast_fp16 = conv(dilations = var_2582_dilations_0, groups = var_2582_groups_0, pad = var_2582_pad_0, pad_type = var_2582_pad_type_0, strides = var_2582_strides_0, weight = layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = string("op_2582_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_27_cast_fp16 = add(x = var_2576_cast_fp16, y = var_2582_cast_fp16)[name = string("key_27_cast_fp16")];
+            string var_2592_pad_type_0 = const()[name = string("op_2592_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2592_strides_0 = const()[name = string("op_2592_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2592_pad_0 = const()[name = string("op_2592_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2592_dilations_0 = const()[name = string("op_2592_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2592_groups_0 = const()[name = string("op_2592_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185470592))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186289856))))[name = string("layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_13_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_13_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186289984)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2592_cast_fp16 = conv(bias = layers_13_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2592_dilations_0, groups = var_2592_groups_0, pad = var_2592_pad_0, pad_type = var_2592_pad_type_0, strides = var_2592_strides_0, weight = layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = string("op_2592_cast_fp16")];
+            string var_2598_pad_type_0 = const()[name = string("op_2598_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2598_strides_0 = const()[name = string("op_2598_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2598_pad_0 = const()[name = string("op_2598_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2598_dilations_0 = const()[name = string("op_2598_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2598_groups_0 = const()[name = string("op_2598_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186311936))), nonzero_data = tensor<fp16, [9625]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186292608))))[name = string("layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2598_cast_fp16 = conv(dilations = var_2598_dilations_0, groups = var_2598_groups_0, pad = var_2598_pad_0, pad_type = var_2598_pad_type_0, strides = var_2598_strides_0, weight = layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = string("op_2598_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_27_cast_fp16 = add(x = var_2592_cast_fp16, y = var_2598_cast_fp16)[name = string("value_27_cast_fp16")];
+            tensor<int32, [4]> var_2601 = const()[name = string("op_2601"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_27_cast_fp16 = reshape(shape = var_2601, x = query_27_cast_fp16)[name = string("mh_q_27_cast_fp16")];
+            fp16 var_2603_to_fp16 = const()[name = string("op_2603_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_2604_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_2603_to_fp16)[name = string("op_2604_cast_fp16")];
+            tensor<int32, [4]> var_2605 = const()[name = string("op_2605"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2606_cast_fp16 = reshape(shape = var_2605, x = key_27_cast_fp16)[name = string("op_2606_cast_fp16")];
+            bool mh_w_27_transpose_x_0 = const()[name = string("mh_w_27_transpose_x_0"), val = bool(true)];
+            bool mh_w_27_transpose_y_0 = const()[name = string("mh_w_27_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_27_cast_fp16 = matmul(transpose_x = mh_w_27_transpose_x_0, transpose_y = mh_w_27_transpose_y_0, x = var_2604_cast_fp16, y = var_2606_cast_fp16)[name = string("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2609_cast_fp16 = softmax(axis = var_2520, x = mh_w_27_cast_fp16)[name = string("op_2609_cast_fp16")];
+            tensor<int32, [4]> var_2610 = const()[name = string("op_2610"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2611_cast_fp16 = reshape(shape = var_2610, x = value_27_cast_fp16)[name = string("op_2611_cast_fp16")];
+            bool attn_27_transpose_x_0 = const()[name = string("attn_27_transpose_x_0"), val = bool(false)];
+            bool attn_27_transpose_y_0 = const()[name = string("attn_27_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_2611_cast_fp16, y = var_2609_cast_fp16)[name = string("attn_27_cast_fp16")];
+            tensor<int32, [4]> var_2614 = const()[name = string("op_2614"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_105_cast_fp16 = reshape(shape = var_2614, x = attn_27_cast_fp16)[name = string("input_105_cast_fp16")];
+            string var_2624_pad_type_0 = const()[name = string("op_2624_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2624_strides_0 = const()[name = string("op_2624_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2624_pad_0 = const()[name = string("op_2624_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2624_dilations_0 = const()[name = string("op_2624_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2624_groups_0 = const()[name = string("op_2624_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186516800))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187336064))))[name = string("layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_13_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_13_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187336192)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2624_cast_fp16 = conv(bias = layers_13_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2624_dilations_0, groups = var_2624_groups_0, pad = var_2624_pad_0, pad_type = var_2624_pad_type_0, strides = var_2624_strides_0, weight = layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("op_2624_cast_fp16")];
+            string var_2630_pad_type_0 = const()[name = string("op_2630_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2630_strides_0 = const()[name = string("op_2630_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2630_pad_0 = const()[name = string("op_2630_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2630_dilations_0 = const()[name = string("op_2630_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2630_groups_0 = const()[name = string("op_2630_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187358784))), nonzero_data = tensor<fp16, [9926]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187338816))))[name = string("layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2630_cast_fp16 = conv(dilations = var_2630_dilations_0, groups = var_2630_groups_0, pad = var_2630_pad_0, pad_type = var_2630_pad_type_0, strides = var_2630_strides_0, weight = layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = string("op_2630_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_55_cast_fp16 = add(x = var_2624_cast_fp16, y = var_2630_cast_fp16)[name = string("obj_55_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = obj_55_cast_fp16)[name = string("inputs_55_cast_fp16")];
+            tensor<int32, [1]> out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2641_to_fp16 = const()[name = string("op_2641_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2641_to_fp16, x = inputs_55_cast_fp16)[name = string("out_55_cast_fp16")];
+            tensor<fp16, [1280]> input_107_gamma_0_to_fp16 = const()[name = string("input_107_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187563648)))];
+            tensor<fp16, [1280]> input_107_beta_0_to_fp16 = const()[name = string("input_107_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187566272)))];
+            fp16 input_107_epsilon_0_to_fp16 = const()[name = string("input_107_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_107_cast_fp16 = batch_norm(beta = input_107_beta_0_to_fp16, epsilon = input_107_epsilon_0_to_fp16, gamma = input_107_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = string("input_107_cast_fp16")];
+            string var_2659_pad_type_0 = const()[name = string("op_2659_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2659_strides_0 = const()[name = string("op_2659_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2659_pad_0 = const()[name = string("op_2659_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2659_dilations_0 = const()[name = string("op_2659_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2659_groups_0 = const()[name = string("op_2659_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_13_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187568896))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190845760))))[name = string("layers_13_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_13_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_13_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190845888)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2659_cast_fp16 = conv(bias = layers_13_fc1_inlier_module_bias_to_fp16, dilations = var_2659_dilations_0, groups = var_2659_groups_0, pad = var_2659_pad_0, pad_type = var_2659_pad_type_0, strides = var_2659_strides_0, weight = layers_13_fc1_inlier_module_weight_to_fp16_palettized, x = input_107_cast_fp16)[name = string("op_2659_cast_fp16")];
+            string var_2665_pad_type_0 = const()[name = string("op_2665_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2665_strides_0 = const()[name = string("op_2665_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2665_pad_0 = const()[name = string("op_2665_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2665_dilations_0 = const()[name = string("op_2665_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2665_groups_0 = const()[name = string("op_2665_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_13_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190903232))), nonzero_data = tensor<fp16, [23480]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(190856192))))[name = string("layers_13_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2665_cast_fp16 = conv(dilations = var_2665_dilations_0, groups = var_2665_groups_0, pad = var_2665_pad_0, pad_type = var_2665_pad_type_0, strides = var_2665_strides_0, weight = layers_13_fc1_outlier_module_weight_to_fp16_sparsified, x = input_107_cast_fp16)[name = string("op_2665_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_109_cast_fp16 = add(x = var_2659_cast_fp16, y = var_2665_cast_fp16)[name = string("input_109_cast_fp16")];
+            string input_111_mode_0 = const()[name = string("input_111_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = string("input_111_cast_fp16")];
+            string var_2676_pad_type_0 = const()[name = string("op_2676_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2676_strides_0 = const()[name = string("op_2676_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2676_pad_0 = const()[name = string("op_2676_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2676_dilations_0 = const()[name = string("op_2676_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2676_groups_0 = const()[name = string("op_2676_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_13_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191722496))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194999360))))[name = string("layers_13_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_13_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_13_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194999488)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2676_cast_fp16 = conv(bias = layers_13_fc2_inlier_module_bias_to_fp16, dilations = var_2676_dilations_0, groups = var_2676_groups_0, pad = var_2676_pad_0, pad_type = var_2676_pad_type_0, strides = var_2676_strides_0, weight = layers_13_fc2_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("op_2676_cast_fp16")];
+            string var_2682_pad_type_0 = const()[name = string("op_2682_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2682_strides_0 = const()[name = string("op_2682_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2682_pad_0 = const()[name = string("op_2682_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2682_dilations_0 = const()[name = string("op_2682_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2682_groups_0 = const()[name = string("op_2682_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_13_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195176960))), nonzero_data = tensor<fp16, [87377]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195002112))))[name = string("layers_13_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2682_cast_fp16 = conv(dilations = var_2682_dilations_0, groups = var_2682_groups_0, pad = var_2682_pad_0, pad_type = var_2682_pad_type_0, strides = var_2682_strides_0, weight = layers_13_fc2_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = string("op_2682_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_31_cast_fp16 = add(x = var_2676_cast_fp16, y = var_2682_cast_fp16)[name = string("hidden_states_31_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = hidden_states_31_cast_fp16)[name = string("inputs_57_cast_fp16")];
+            int32 var_2692 = const()[name = string("op_2692"), val = int32(3)];
+            tensor<int32, [1]> out_57_axes_0 = const()[name = string("out_57_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2711_to_fp16 = const()[name = string("op_2711_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_2711_to_fp16, x = inputs_57_cast_fp16)[name = string("out_57_cast_fp16")];
+            tensor<fp16, [1280]> obj_57_gamma_0_to_fp16 = const()[name = string("obj_57_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195996224)))];
+            tensor<fp16, [1280]> obj_57_beta_0_to_fp16 = const()[name = string("obj_57_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195998848)))];
+            fp16 obj_57_epsilon_0_to_fp16 = const()[name = string("obj_57_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = string("obj_57_cast_fp16")];
+            string var_2733_pad_type_0 = const()[name = string("op_2733_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2733_strides_0 = const()[name = string("op_2733_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2733_pad_0 = const()[name = string("op_2733_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2733_dilations_0 = const()[name = string("op_2733_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2733_groups_0 = const()[name = string("op_2733_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196001472))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196820736))))[name = string("layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_14_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_14_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196820864)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2733_cast_fp16 = conv(bias = layers_14_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2733_dilations_0, groups = var_2733_groups_0, pad = var_2733_pad_0, pad_type = var_2733_pad_type_0, strides = var_2733_strides_0, weight = layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = string("op_2733_cast_fp16")];
+            string var_2739_pad_type_0 = const()[name = string("op_2739_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2739_strides_0 = const()[name = string("op_2739_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2739_pad_0 = const()[name = string("op_2739_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2739_dilations_0 = const()[name = string("op_2739_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2739_groups_0 = const()[name = string("op_2739_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196868352))), nonzero_data = tensor<fp16, [22369]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196823488))))[name = string("layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2739_cast_fp16 = conv(dilations = var_2739_dilations_0, groups = var_2739_groups_0, pad = var_2739_pad_0, pad_type = var_2739_pad_type_0, strides = var_2739_strides_0, weight = layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = string("op_2739_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_29_cast_fp16 = add(x = var_2733_cast_fp16, y = var_2739_cast_fp16)[name = string("query_29_cast_fp16")];
+            string var_2748_pad_type_0 = const()[name = string("op_2748_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2748_strides_0 = const()[name = string("op_2748_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2748_pad_0 = const()[name = string("op_2748_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2748_dilations_0 = const()[name = string("op_2748_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2748_groups_0 = const()[name = string("op_2748_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197073216))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197892480))))[name = string("layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2748_cast_fp16 = conv(dilations = var_2748_dilations_0, groups = var_2748_groups_0, pad = var_2748_pad_0, pad_type = var_2748_pad_type_0, strides = var_2748_strides_0, weight = layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = string("op_2748_cast_fp16")];
+            string var_2754_pad_type_0 = const()[name = string("op_2754_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2754_strides_0 = const()[name = string("op_2754_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2754_pad_0 = const()[name = string("op_2754_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2754_dilations_0 = const()[name = string("op_2754_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2754_groups_0 = const()[name = string("op_2754_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197921344))), nonzero_data = tensor<fp16, [14324]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197892608))))[name = string("layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2754_cast_fp16 = conv(dilations = var_2754_dilations_0, groups = var_2754_groups_0, pad = var_2754_pad_0, pad_type = var_2754_pad_type_0, strides = var_2754_strides_0, weight = layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = string("op_2754_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_29_cast_fp16 = add(x = var_2748_cast_fp16, y = var_2754_cast_fp16)[name = string("key_29_cast_fp16")];
+            string var_2764_pad_type_0 = const()[name = string("op_2764_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2764_strides_0 = const()[name = string("op_2764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2764_pad_0 = const()[name = string("op_2764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2764_dilations_0 = const()[name = string("op_2764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2764_groups_0 = const()[name = string("op_2764_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198126208))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198945472))))[name = string("layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_14_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_14_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198945600)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2764_cast_fp16 = conv(bias = layers_14_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2764_dilations_0, groups = var_2764_groups_0, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2764_strides_0, weight = layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = string("op_2764_cast_fp16")];
+            string var_2770_pad_type_0 = const()[name = string("op_2770_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2770_strides_0 = const()[name = string("op_2770_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2770_pad_0 = const()[name = string("op_2770_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2770_dilations_0 = const()[name = string("op_2770_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2770_groups_0 = const()[name = string("op_2770_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198963968))), nonzero_data = tensor<fp16, [7837]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(198948224))))[name = string("layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2770_cast_fp16 = conv(dilations = var_2770_dilations_0, groups = var_2770_groups_0, pad = var_2770_pad_0, pad_type = var_2770_pad_type_0, strides = var_2770_strides_0, weight = layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = string("op_2770_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_29_cast_fp16 = add(x = var_2764_cast_fp16, y = var_2770_cast_fp16)[name = string("value_29_cast_fp16")];
+            tensor<int32, [4]> var_2773 = const()[name = string("op_2773"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_29_cast_fp16 = reshape(shape = var_2773, x = query_29_cast_fp16)[name = string("mh_q_29_cast_fp16")];
+            fp16 var_2775_to_fp16 = const()[name = string("op_2775_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_2776_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_2775_to_fp16)[name = string("op_2776_cast_fp16")];
+            tensor<int32, [4]> var_2777 = const()[name = string("op_2777"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2778_cast_fp16 = reshape(shape = var_2777, x = key_29_cast_fp16)[name = string("op_2778_cast_fp16")];
+            bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)];
+            bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_2776_cast_fp16, y = var_2778_cast_fp16)[name = string("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2781_cast_fp16 = softmax(axis = var_2692, x = mh_w_29_cast_fp16)[name = string("op_2781_cast_fp16")];
+            tensor<int32, [4]> var_2782 = const()[name = string("op_2782"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2783_cast_fp16 = reshape(shape = var_2782, x = value_29_cast_fp16)[name = string("op_2783_cast_fp16")];
+            bool attn_29_transpose_x_0 = const()[name = string("attn_29_transpose_x_0"), val = bool(false)];
+            bool attn_29_transpose_y_0 = const()[name = string("attn_29_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2783_cast_fp16, y = var_2781_cast_fp16)[name = string("attn_29_cast_fp16")];
+            tensor<int32, [4]> var_2786 = const()[name = string("op_2786"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_113_cast_fp16 = reshape(shape = var_2786, x = attn_29_cast_fp16)[name = string("input_113_cast_fp16")];
+            string var_2796_pad_type_0 = const()[name = string("op_2796_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2796_strides_0 = const()[name = string("op_2796_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2796_pad_0 = const()[name = string("op_2796_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2796_dilations_0 = const()[name = string("op_2796_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2796_groups_0 = const()[name = string("op_2796_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199168832))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199988096))))[name = string("layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_14_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_14_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199988224)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2796_cast_fp16 = conv(bias = layers_14_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2796_dilations_0, groups = var_2796_groups_0, pad = var_2796_pad_0, pad_type = var_2796_pad_type_0, strides = var_2796_strides_0, weight = layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("op_2796_cast_fp16")];
+            string var_2802_pad_type_0 = const()[name = string("op_2802_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2802_strides_0 = const()[name = string("op_2802_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2802_pad_0 = const()[name = string("op_2802_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2802_dilations_0 = const()[name = string("op_2802_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2802_groups_0 = const()[name = string("op_2802_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200005632))), nonzero_data = tensor<fp16, [7359]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199990848))))[name = string("layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2802_cast_fp16 = conv(dilations = var_2802_dilations_0, groups = var_2802_groups_0, pad = var_2802_pad_0, pad_type = var_2802_pad_type_0, strides = var_2802_strides_0, weight = layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = string("op_2802_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_59_cast_fp16 = add(x = var_2796_cast_fp16, y = var_2802_cast_fp16)[name = string("obj_59_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_59_cast_fp16)[name = string("inputs_59_cast_fp16")];
+            tensor<int32, [1]> out_59_axes_0 = const()[name = string("out_59_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2813_to_fp16 = const()[name = string("op_2813_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_2813_to_fp16, x = inputs_59_cast_fp16)[name = string("out_59_cast_fp16")];
+            tensor<fp16, [1280]> input_115_gamma_0_to_fp16 = const()[name = string("input_115_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200210496)))];
+            tensor<fp16, [1280]> input_115_beta_0_to_fp16 = const()[name = string("input_115_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200213120)))];
+            fp16 input_115_epsilon_0_to_fp16 = const()[name = string("input_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = string("input_115_cast_fp16")];
+            string var_2831_pad_type_0 = const()[name = string("op_2831_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2831_strides_0 = const()[name = string("op_2831_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2831_pad_0 = const()[name = string("op_2831_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2831_dilations_0 = const()[name = string("op_2831_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2831_groups_0 = const()[name = string("op_2831_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_14_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200215744))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203492608))))[name = string("layers_14_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_14_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_14_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203492736)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2831_cast_fp16 = conv(bias = layers_14_fc1_inlier_module_bias_to_fp16, dilations = var_2831_dilations_0, groups = var_2831_groups_0, pad = var_2831_pad_0, pad_type = var_2831_pad_type_0, strides = var_2831_strides_0, weight = layers_14_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("op_2831_cast_fp16")];
+            string var_2837_pad_type_0 = const()[name = string("op_2837_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2837_strides_0 = const()[name = string("op_2837_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2837_pad_0 = const()[name = string("op_2837_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2837_dilations_0 = const()[name = string("op_2837_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2837_groups_0 = const()[name = string("op_2837_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_14_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203552448))), nonzero_data = tensor<fp16, [24642]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203503040))))[name = string("layers_14_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_2837_cast_fp16 = conv(dilations = var_2837_dilations_0, groups = var_2837_groups_0, pad = var_2837_pad_0, pad_type = var_2837_pad_type_0, strides = var_2837_strides_0, weight = layers_14_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = string("op_2837_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_117_cast_fp16 = add(x = var_2831_cast_fp16, y = var_2837_cast_fp16)[name = string("input_117_cast_fp16")];
+            string input_119_mode_0 = const()[name = string("input_119_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_119_cast_fp16 = gelu(mode = input_119_mode_0, x = input_117_cast_fp16)[name = string("input_119_cast_fp16")];
+            string var_2848_pad_type_0 = const()[name = string("op_2848_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2848_strides_0 = const()[name = string("op_2848_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2848_pad_0 = const()[name = string("op_2848_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2848_dilations_0 = const()[name = string("op_2848_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2848_groups_0 = const()[name = string("op_2848_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_14_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(204371712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207648576))))[name = string("layers_14_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_14_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_14_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207648704)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2848_cast_fp16 = conv(bias = layers_14_fc2_inlier_module_bias_to_fp16, dilations = var_2848_dilations_0, groups = var_2848_groups_0, pad = var_2848_pad_0, pad_type = var_2848_pad_type_0, strides = var_2848_strides_0, weight = layers_14_fc2_inlier_module_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = string("op_2848_cast_fp16")];
+            string var_2854_pad_type_0 = const()[name = string("op_2854_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2854_strides_0 = const()[name = string("op_2854_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2854_pad_0 = const()[name = string("op_2854_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2854_dilations_0 = const()[name = string("op_2854_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2854_groups_0 = const()[name = string("op_2854_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_14_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207800128))), nonzero_data = tensor<fp16, [74342]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(207651328))))[name = string("layers_14_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2854_cast_fp16 = conv(dilations = var_2854_dilations_0, groups = var_2854_groups_0, pad = var_2854_pad_0, pad_type = var_2854_pad_type_0, strides = var_2854_strides_0, weight = layers_14_fc2_outlier_module_weight_to_fp16_sparsified, x = input_119_cast_fp16)[name = string("op_2854_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_33_cast_fp16 = add(x = var_2848_cast_fp16, y = var_2854_cast_fp16)[name = string("hidden_states_33_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_33_cast_fp16)[name = string("inputs_61_cast_fp16")];
+            int32 var_2864 = const()[name = string("op_2864"), val = int32(3)];
+            tensor<int32, [1]> out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2883_to_fp16 = const()[name = string("op_2883_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_2883_to_fp16, x = inputs_61_cast_fp16)[name = string("out_61_cast_fp16")];
+            tensor<fp16, [1280]> obj_61_gamma_0_to_fp16 = const()[name = string("obj_61_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208619392)))];
+            tensor<fp16, [1280]> obj_61_beta_0_to_fp16 = const()[name = string("obj_61_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208622016)))];
+            fp16 obj_61_epsilon_0_to_fp16 = const()[name = string("obj_61_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = string("obj_61_cast_fp16")];
+            string var_2905_pad_type_0 = const()[name = string("op_2905_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2905_strides_0 = const()[name = string("op_2905_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2905_pad_0 = const()[name = string("op_2905_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2905_dilations_0 = const()[name = string("op_2905_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2905_groups_0 = const()[name = string("op_2905_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208624640))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209443904))))[name = string("layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_15_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_15_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209444032)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2905_cast_fp16 = conv(bias = layers_15_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2905_dilations_0, groups = var_2905_groups_0, pad = var_2905_pad_0, pad_type = var_2905_pad_type_0, strides = var_2905_strides_0, weight = layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_2905_cast_fp16")];
+            string var_2911_pad_type_0 = const()[name = string("op_2911_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2911_strides_0 = const()[name = string("op_2911_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2911_pad_0 = const()[name = string("op_2911_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2911_dilations_0 = const()[name = string("op_2911_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2911_groups_0 = const()[name = string("op_2911_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209492416))), nonzero_data = tensor<fp16, [22840]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209446656))))[name = string("layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2911_cast_fp16 = conv(dilations = var_2911_dilations_0, groups = var_2911_groups_0, pad = var_2911_pad_0, pad_type = var_2911_pad_type_0, strides = var_2911_strides_0, weight = layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_2911_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_31_cast_fp16 = add(x = var_2905_cast_fp16, y = var_2911_cast_fp16)[name = string("query_31_cast_fp16")];
+            string var_2920_pad_type_0 = const()[name = string("op_2920_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2920_strides_0 = const()[name = string("op_2920_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2920_pad_0 = const()[name = string("op_2920_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2920_dilations_0 = const()[name = string("op_2920_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2920_groups_0 = const()[name = string("op_2920_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(209697280))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210516544))))[name = string("layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2920_cast_fp16 = conv(dilations = var_2920_dilations_0, groups = var_2920_groups_0, pad = var_2920_pad_0, pad_type = var_2920_pad_type_0, strides = var_2920_strides_0, weight = layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_2920_cast_fp16")];
+            string var_2926_pad_type_0 = const()[name = string("op_2926_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2926_strides_0 = const()[name = string("op_2926_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2926_pad_0 = const()[name = string("op_2926_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2926_dilations_0 = const()[name = string("op_2926_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2926_groups_0 = const()[name = string("op_2926_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210542144))), nonzero_data = tensor<fp16, [12688]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210516672))))[name = string("layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2926_cast_fp16 = conv(dilations = var_2926_dilations_0, groups = var_2926_groups_0, pad = var_2926_pad_0, pad_type = var_2926_pad_type_0, strides = var_2926_strides_0, weight = layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_2926_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_31_cast_fp16 = add(x = var_2920_cast_fp16, y = var_2926_cast_fp16)[name = string("key_31_cast_fp16")];
+            string var_2936_pad_type_0 = const()[name = string("op_2936_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2936_strides_0 = const()[name = string("op_2936_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2936_pad_0 = const()[name = string("op_2936_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2936_dilations_0 = const()[name = string("op_2936_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2936_groups_0 = const()[name = string("op_2936_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210747008))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211566272))))[name = string("layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_15_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_15_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211566400)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2936_cast_fp16 = conv(bias = layers_15_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2936_dilations_0, groups = var_2936_groups_0, pad = var_2936_pad_0, pad_type = var_2936_pad_type_0, strides = var_2936_strides_0, weight = layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_2936_cast_fp16")];
+            string var_2942_pad_type_0 = const()[name = string("op_2942_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2942_strides_0 = const()[name = string("op_2942_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2942_pad_0 = const()[name = string("op_2942_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2942_dilations_0 = const()[name = string("op_2942_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2942_groups_0 = const()[name = string("op_2942_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211586240))), nonzero_data = tensor<fp16, [8563]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211569024))))[name = string("layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2942_cast_fp16 = conv(dilations = var_2942_dilations_0, groups = var_2942_groups_0, pad = var_2942_pad_0, pad_type = var_2942_pad_type_0, strides = var_2942_strides_0, weight = layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_2942_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_31_cast_fp16 = add(x = var_2936_cast_fp16, y = var_2942_cast_fp16)[name = string("value_31_cast_fp16")];
+            tensor<int32, [4]> var_2945 = const()[name = string("op_2945"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_31_cast_fp16 = reshape(shape = var_2945, x = query_31_cast_fp16)[name = string("mh_q_31_cast_fp16")];
+            fp16 var_2947_to_fp16 = const()[name = string("op_2947_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_2948_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2947_to_fp16)[name = string("op_2948_cast_fp16")];
+            tensor<int32, [4]> var_2949 = const()[name = string("op_2949"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2950_cast_fp16 = reshape(shape = var_2949, x = key_31_cast_fp16)[name = string("op_2950_cast_fp16")];
+            bool mh_w_31_transpose_x_0 = const()[name = string("mh_w_31_transpose_x_0"), val = bool(true)];
+            bool mh_w_31_transpose_y_0 = const()[name = string("mh_w_31_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_2948_cast_fp16, y = var_2950_cast_fp16)[name = string("mh_w_31_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_2953_cast_fp16 = softmax(axis = var_2864, x = mh_w_31_cast_fp16)[name = string("op_2953_cast_fp16")];
+            tensor<int32, [4]> var_2954 = const()[name = string("op_2954"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_2955_cast_fp16 = reshape(shape = var_2954, x = value_31_cast_fp16)[name = string("op_2955_cast_fp16")];
+            bool attn_31_transpose_x_0 = const()[name = string("attn_31_transpose_x_0"), val = bool(false)];
+            bool attn_31_transpose_y_0 = const()[name = string("attn_31_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2955_cast_fp16, y = var_2953_cast_fp16)[name = string("attn_31_cast_fp16")];
+            tensor<int32, [4]> var_2958 = const()[name = string("op_2958"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_121_cast_fp16 = reshape(shape = var_2958, x = attn_31_cast_fp16)[name = string("input_121_cast_fp16")];
+            string var_2968_pad_type_0 = const()[name = string("op_2968_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2968_strides_0 = const()[name = string("op_2968_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2968_pad_0 = const()[name = string("op_2968_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2968_dilations_0 = const()[name = string("op_2968_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2968_groups_0 = const()[name = string("op_2968_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211791104))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212610368))))[name = string("layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_15_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_15_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212610496)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2968_cast_fp16 = conv(bias = layers_15_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2968_dilations_0, groups = var_2968_groups_0, pad = var_2968_pad_0, pad_type = var_2968_pad_type_0, strides = var_2968_strides_0, weight = layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = string("op_2968_cast_fp16")];
+            string var_2974_pad_type_0 = const()[name = string("op_2974_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2974_strides_0 = const()[name = string("op_2974_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2974_pad_0 = const()[name = string("op_2974_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2974_dilations_0 = const()[name = string("op_2974_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2974_groups_0 = const()[name = string("op_2974_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212633664))), nonzero_data = tensor<fp16, [10240]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212613120))))[name = string("layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_2974_cast_fp16 = conv(dilations = var_2974_dilations_0, groups = var_2974_groups_0, pad = var_2974_pad_0, pad_type = var_2974_pad_type_0, strides = var_2974_strides_0, weight = layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_121_cast_fp16)[name = string("op_2974_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_63_cast_fp16 = add(x = var_2968_cast_fp16, y = var_2974_cast_fp16)[name = string("obj_63_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_63_cast_fp16)[name = string("inputs_63_cast_fp16")];
+            tensor<int32, [1]> out_63_axes_0 = const()[name = string("out_63_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2985_to_fp16 = const()[name = string("op_2985_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_2985_to_fp16, x = inputs_63_cast_fp16)[name = string("out_63_cast_fp16")];
+            tensor<fp16, [1280]> input_123_gamma_0_to_fp16 = const()[name = string("input_123_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212838528)))];
+            tensor<fp16, [1280]> input_123_beta_0_to_fp16 = const()[name = string("input_123_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212841152)))];
+            fp16 input_123_epsilon_0_to_fp16 = const()[name = string("input_123_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_123_cast_fp16 = batch_norm(beta = input_123_beta_0_to_fp16, epsilon = input_123_epsilon_0_to_fp16, gamma = input_123_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = string("input_123_cast_fp16")];
+            string var_3003_pad_type_0 = const()[name = string("op_3003_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3003_strides_0 = const()[name = string("op_3003_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3003_pad_0 = const()[name = string("op_3003_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3003_dilations_0 = const()[name = string("op_3003_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3003_groups_0 = const()[name = string("op_3003_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_15_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(212843776))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216120640))))[name = string("layers_15_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_15_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_15_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216120768)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3003_cast_fp16 = conv(bias = layers_15_fc1_inlier_module_bias_to_fp16, dilations = var_3003_dilations_0, groups = var_3003_groups_0, pad = var_3003_pad_0, pad_type = var_3003_pad_type_0, strides = var_3003_strides_0, weight = layers_15_fc1_inlier_module_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = string("op_3003_cast_fp16")];
+            string var_3009_pad_type_0 = const()[name = string("op_3009_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3009_strides_0 = const()[name = string("op_3009_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3009_pad_0 = const()[name = string("op_3009_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3009_dilations_0 = const()[name = string("op_3009_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3009_groups_0 = const()[name = string("op_3009_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_15_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216176896))), nonzero_data = tensor<fp16, [22855]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216131072))))[name = string("layers_15_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3009_cast_fp16 = conv(dilations = var_3009_dilations_0, groups = var_3009_groups_0, pad = var_3009_pad_0, pad_type = var_3009_pad_type_0, strides = var_3009_strides_0, weight = layers_15_fc1_outlier_module_weight_to_fp16_sparsified, x = input_123_cast_fp16)[name = string("op_3009_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_125_cast_fp16 = add(x = var_3003_cast_fp16, y = var_3009_cast_fp16)[name = string("input_125_cast_fp16")];
+            string input_127_mode_0 = const()[name = string("input_127_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_127_cast_fp16 = gelu(mode = input_127_mode_0, x = input_125_cast_fp16)[name = string("input_127_cast_fp16")];
+            string var_3020_pad_type_0 = const()[name = string("op_3020_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3020_strides_0 = const()[name = string("op_3020_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3020_pad_0 = const()[name = string("op_3020_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3020_dilations_0 = const()[name = string("op_3020_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3020_groups_0 = const()[name = string("op_3020_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_15_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216996160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220273024))))[name = string("layers_15_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_15_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_15_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220273152)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3020_cast_fp16 = conv(bias = layers_15_fc2_inlier_module_bias_to_fp16, dilations = var_3020_dilations_0, groups = var_3020_groups_0, pad = var_3020_pad_0, pad_type = var_3020_pad_type_0, strides = var_3020_strides_0, weight = layers_15_fc2_inlier_module_weight_to_fp16_palettized, x = input_127_cast_fp16)[name = string("op_3020_cast_fp16")];
+            string var_3026_pad_type_0 = const()[name = string("op_3026_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3026_strides_0 = const()[name = string("op_3026_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3026_pad_0 = const()[name = string("op_3026_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3026_dilations_0 = const()[name = string("op_3026_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3026_groups_0 = const()[name = string("op_3026_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_15_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220449024))), nonzero_data = tensor<fp16, [86580]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220275776))))[name = string("layers_15_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3026_cast_fp16 = conv(dilations = var_3026_dilations_0, groups = var_3026_groups_0, pad = var_3026_pad_0, pad_type = var_3026_pad_type_0, strides = var_3026_strides_0, weight = layers_15_fc2_outlier_module_weight_to_fp16_sparsified, x = input_127_cast_fp16)[name = string("op_3026_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_35_cast_fp16 = add(x = var_3020_cast_fp16, y = var_3026_cast_fp16)[name = string("hidden_states_35_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = hidden_states_35_cast_fp16)[name = string("inputs_65_cast_fp16")];
+            int32 var_3036 = const()[name = string("op_3036"), val = int32(3)];
+            tensor<int32, [1]> out_65_axes_0 = const()[name = string("out_65_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3055_to_fp16 = const()[name = string("op_3055_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_3055_to_fp16, x = inputs_65_cast_fp16)[name = string("out_65_cast_fp16")];
+            tensor<fp16, [1280]> obj_65_gamma_0_to_fp16 = const()[name = string("obj_65_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221268288)))];
+            tensor<fp16, [1280]> obj_65_beta_0_to_fp16 = const()[name = string("obj_65_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221270912)))];
+            fp16 obj_65_epsilon_0_to_fp16 = const()[name = string("obj_65_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = string("obj_65_cast_fp16")];
+            string var_3077_pad_type_0 = const()[name = string("op_3077_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3077_strides_0 = const()[name = string("op_3077_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3077_pad_0 = const()[name = string("op_3077_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3077_dilations_0 = const()[name = string("op_3077_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3077_groups_0 = const()[name = string("op_3077_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221273536))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222092800))))[name = string("layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_16_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_16_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222092928)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3077_cast_fp16 = conv(bias = layers_16_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3077_dilations_0, groups = var_3077_groups_0, pad = var_3077_pad_0, pad_type = var_3077_pad_type_0, strides = var_3077_strides_0, weight = layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = string("op_3077_cast_fp16")];
+            string var_3083_pad_type_0 = const()[name = string("op_3083_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3083_strides_0 = const()[name = string("op_3083_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3083_pad_0 = const()[name = string("op_3083_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3083_dilations_0 = const()[name = string("op_3083_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3083_groups_0 = const()[name = string("op_3083_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222164416))), nonzero_data = tensor<fp16, [34395]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222095552))))[name = string("layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3083_cast_fp16 = conv(dilations = var_3083_dilations_0, groups = var_3083_groups_0, pad = var_3083_pad_0, pad_type = var_3083_pad_type_0, strides = var_3083_strides_0, weight = layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = string("op_3083_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_33_cast_fp16 = add(x = var_3077_cast_fp16, y = var_3083_cast_fp16)[name = string("query_33_cast_fp16")];
+            string var_3092_pad_type_0 = const()[name = string("op_3092_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3092_strides_0 = const()[name = string("op_3092_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3092_pad_0 = const()[name = string("op_3092_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3092_dilations_0 = const()[name = string("op_3092_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3092_groups_0 = const()[name = string("op_3092_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222369280))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223188544))))[name = string("layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3092_cast_fp16 = conv(dilations = var_3092_dilations_0, groups = var_3092_groups_0, pad = var_3092_pad_0, pad_type = var_3092_pad_type_0, strides = var_3092_strides_0, weight = layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = string("op_3092_cast_fp16")];
+            string var_3098_pad_type_0 = const()[name = string("op_3098_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3098_strides_0 = const()[name = string("op_3098_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3098_pad_0 = const()[name = string("op_3098_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3098_dilations_0 = const()[name = string("op_3098_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3098_groups_0 = const()[name = string("op_3098_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223217792))), nonzero_data = tensor<fp16, [14518]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223188672))))[name = string("layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3098_cast_fp16 = conv(dilations = var_3098_dilations_0, groups = var_3098_groups_0, pad = var_3098_pad_0, pad_type = var_3098_pad_type_0, strides = var_3098_strides_0, weight = layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = string("op_3098_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_33_cast_fp16 = add(x = var_3092_cast_fp16, y = var_3098_cast_fp16)[name = string("key_33_cast_fp16")];
+            string var_3108_pad_type_0 = const()[name = string("op_3108_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3108_strides_0 = const()[name = string("op_3108_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3108_pad_0 = const()[name = string("op_3108_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3108_dilations_0 = const()[name = string("op_3108_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3108_groups_0 = const()[name = string("op_3108_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(223422656))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224241920))))[name = string("layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_16_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_16_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224242048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3108_cast_fp16 = conv(bias = layers_16_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3108_dilations_0, groups = var_3108_groups_0, pad = var_3108_pad_0, pad_type = var_3108_pad_type_0, strides = var_3108_strides_0, weight = layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = string("op_3108_cast_fp16")];
+            string var_3114_pad_type_0 = const()[name = string("op_3114_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3114_strides_0 = const()[name = string("op_3114_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3114_pad_0 = const()[name = string("op_3114_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3114_dilations_0 = const()[name = string("op_3114_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3114_groups_0 = const()[name = string("op_3114_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224261888))), nonzero_data = tensor<fp16, [8562]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224244672))))[name = string("layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3114_cast_fp16 = conv(dilations = var_3114_dilations_0, groups = var_3114_groups_0, pad = var_3114_pad_0, pad_type = var_3114_pad_type_0, strides = var_3114_strides_0, weight = layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = string("op_3114_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_33_cast_fp16 = add(x = var_3108_cast_fp16, y = var_3114_cast_fp16)[name = string("value_33_cast_fp16")];
+            tensor<int32, [4]> var_3117 = const()[name = string("op_3117"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_33_cast_fp16 = reshape(shape = var_3117, x = query_33_cast_fp16)[name = string("mh_q_33_cast_fp16")];
+            fp16 var_3119_to_fp16 = const()[name = string("op_3119_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_3120_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_3119_to_fp16)[name = string("op_3120_cast_fp16")];
+            tensor<int32, [4]> var_3121 = const()[name = string("op_3121"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3122_cast_fp16 = reshape(shape = var_3121, x = key_33_cast_fp16)[name = string("op_3122_cast_fp16")];
+            bool mh_w_33_transpose_x_0 = const()[name = string("mh_w_33_transpose_x_0"), val = bool(true)];
+            bool mh_w_33_transpose_y_0 = const()[name = string("mh_w_33_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_33_cast_fp16 = matmul(transpose_x = mh_w_33_transpose_x_0, transpose_y = mh_w_33_transpose_y_0, x = var_3120_cast_fp16, y = var_3122_cast_fp16)[name = string("mh_w_33_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3125_cast_fp16 = softmax(axis = var_3036, x = mh_w_33_cast_fp16)[name = string("op_3125_cast_fp16")];
+            tensor<int32, [4]> var_3126 = const()[name = string("op_3126"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3127_cast_fp16 = reshape(shape = var_3126, x = value_33_cast_fp16)[name = string("op_3127_cast_fp16")];
+            bool attn_33_transpose_x_0 = const()[name = string("attn_33_transpose_x_0"), val = bool(false)];
+            bool attn_33_transpose_y_0 = const()[name = string("attn_33_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_3127_cast_fp16, y = var_3125_cast_fp16)[name = string("attn_33_cast_fp16")];
+            tensor<int32, [4]> var_3130 = const()[name = string("op_3130"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_129_cast_fp16 = reshape(shape = var_3130, x = attn_33_cast_fp16)[name = string("input_129_cast_fp16")];
+            string var_3140_pad_type_0 = const()[name = string("op_3140_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3140_strides_0 = const()[name = string("op_3140_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3140_pad_0 = const()[name = string("op_3140_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3140_dilations_0 = const()[name = string("op_3140_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3140_groups_0 = const()[name = string("op_3140_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224466752))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225286016))))[name = string("layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_16_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_16_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225286144)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3140_cast_fp16 = conv(bias = layers_16_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3140_dilations_0, groups = var_3140_groups_0, pad = var_3140_pad_0, pad_type = var_3140_pad_type_0, strides = var_3140_strides_0, weight = layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = string("op_3140_cast_fp16")];
+            string var_3146_pad_type_0 = const()[name = string("op_3146_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3146_strides_0 = const()[name = string("op_3146_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3146_pad_0 = const()[name = string("op_3146_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3146_dilations_0 = const()[name = string("op_3146_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3146_groups_0 = const()[name = string("op_3146_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225307136))), nonzero_data = tensor<fp16, [9123]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225288768))))[name = string("layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3146_cast_fp16 = conv(dilations = var_3146_dilations_0, groups = var_3146_groups_0, pad = var_3146_pad_0, pad_type = var_3146_pad_type_0, strides = var_3146_strides_0, weight = layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_129_cast_fp16)[name = string("op_3146_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_67_cast_fp16 = add(x = var_3140_cast_fp16, y = var_3146_cast_fp16)[name = string("obj_67_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_67_cast_fp16")];
+            tensor<int32, [1]> out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3157_to_fp16 = const()[name = string("op_3157_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_3157_to_fp16, x = inputs_67_cast_fp16)[name = string("out_67_cast_fp16")];
+            tensor<fp16, [1280]> input_131_gamma_0_to_fp16 = const()[name = string("input_131_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225512000)))];
+            tensor<fp16, [1280]> input_131_beta_0_to_fp16 = const()[name = string("input_131_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225514624)))];
+            fp16 input_131_epsilon_0_to_fp16 = const()[name = string("input_131_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_131_cast_fp16 = batch_norm(beta = input_131_beta_0_to_fp16, epsilon = input_131_epsilon_0_to_fp16, gamma = input_131_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = string("input_131_cast_fp16")];
+            string var_3175_pad_type_0 = const()[name = string("op_3175_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3175_strides_0 = const()[name = string("op_3175_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3175_pad_0 = const()[name = string("op_3175_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3175_dilations_0 = const()[name = string("op_3175_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3175_groups_0 = const()[name = string("op_3175_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_16_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225517248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228794112))))[name = string("layers_16_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_16_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_16_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228794240)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3175_cast_fp16 = conv(bias = layers_16_fc1_inlier_module_bias_to_fp16, dilations = var_3175_dilations_0, groups = var_3175_groups_0, pad = var_3175_pad_0, pad_type = var_3175_pad_type_0, strides = var_3175_strides_0, weight = layers_16_fc1_inlier_module_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = string("op_3175_cast_fp16")];
+            string var_3181_pad_type_0 = const()[name = string("op_3181_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3181_strides_0 = const()[name = string("op_3181_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3181_pad_0 = const()[name = string("op_3181_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3181_dilations_0 = const()[name = string("op_3181_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3181_groups_0 = const()[name = string("op_3181_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_16_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228879488))), nonzero_data = tensor<fp16, [37435]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228804544))))[name = string("layers_16_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3181_cast_fp16 = conv(dilations = var_3181_dilations_0, groups = var_3181_groups_0, pad = var_3181_pad_0, pad_type = var_3181_pad_type_0, strides = var_3181_strides_0, weight = layers_16_fc1_outlier_module_weight_to_fp16_sparsified, x = input_131_cast_fp16)[name = string("op_3181_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_133_cast_fp16 = add(x = var_3175_cast_fp16, y = var_3181_cast_fp16)[name = string("input_133_cast_fp16")];
+            string input_135_mode_0 = const()[name = string("input_135_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_135_cast_fp16 = gelu(mode = input_135_mode_0, x = input_133_cast_fp16)[name = string("input_135_cast_fp16")];
+            string var_3192_pad_type_0 = const()[name = string("op_3192_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3192_strides_0 = const()[name = string("op_3192_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3192_pad_0 = const()[name = string("op_3192_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3192_dilations_0 = const()[name = string("op_3192_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3192_groups_0 = const()[name = string("op_3192_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_16_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(229698752))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232975616))))[name = string("layers_16_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_16_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_16_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232975744)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3192_cast_fp16 = conv(bias = layers_16_fc2_inlier_module_bias_to_fp16, dilations = var_3192_dilations_0, groups = var_3192_groups_0, pad = var_3192_pad_0, pad_type = var_3192_pad_type_0, strides = var_3192_strides_0, weight = layers_16_fc2_inlier_module_weight_to_fp16_palettized, x = input_135_cast_fp16)[name = string("op_3192_cast_fp16")];
+            string var_3198_pad_type_0 = const()[name = string("op_3198_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3198_strides_0 = const()[name = string("op_3198_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3198_pad_0 = const()[name = string("op_3198_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3198_dilations_0 = const()[name = string("op_3198_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3198_groups_0 = const()[name = string("op_3198_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_16_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233104448))), nonzero_data = tensor<fp16, [62994]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(232978368))))[name = string("layers_16_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3198_cast_fp16 = conv(dilations = var_3198_dilations_0, groups = var_3198_groups_0, pad = var_3198_pad_0, pad_type = var_3198_pad_type_0, strides = var_3198_strides_0, weight = layers_16_fc2_outlier_module_weight_to_fp16_sparsified, x = input_135_cast_fp16)[name = string("op_3198_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_37_cast_fp16 = add(x = var_3192_cast_fp16, y = var_3198_cast_fp16)[name = string("hidden_states_37_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = hidden_states_37_cast_fp16)[name = string("inputs_69_cast_fp16")];
+            int32 var_3208 = const()[name = string("op_3208"), val = int32(3)];
+            tensor<int32, [1]> out_69_axes_0 = const()[name = string("out_69_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3227_to_fp16 = const()[name = string("op_3227_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_3227_to_fp16, x = inputs_69_cast_fp16)[name = string("out_69_cast_fp16")];
+            tensor<fp16, [1280]> obj_69_gamma_0_to_fp16 = const()[name = string("obj_69_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233923712)))];
+            tensor<fp16, [1280]> obj_69_beta_0_to_fp16 = const()[name = string("obj_69_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233926336)))];
+            fp16 obj_69_epsilon_0_to_fp16 = const()[name = string("obj_69_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = string("obj_69_cast_fp16")];
+            string var_3249_pad_type_0 = const()[name = string("op_3249_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3249_strides_0 = const()[name = string("op_3249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3249_pad_0 = const()[name = string("op_3249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3249_dilations_0 = const()[name = string("op_3249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3249_groups_0 = const()[name = string("op_3249_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233928960))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234748224))))[name = string("layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_17_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_17_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234748352)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3249_cast_fp16 = conv(bias = layers_17_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3249_dilations_0, groups = var_3249_groups_0, pad = var_3249_pad_0, pad_type = var_3249_pad_type_0, strides = var_3249_strides_0, weight = layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_3249_cast_fp16")];
+            string var_3255_pad_type_0 = const()[name = string("op_3255_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3255_strides_0 = const()[name = string("op_3255_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3255_pad_0 = const()[name = string("op_3255_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3255_dilations_0 = const()[name = string("op_3255_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3255_groups_0 = const()[name = string("op_3255_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234801152))), nonzero_data = tensor<fp16, [25054]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(234750976))))[name = string("layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3255_cast_fp16 = conv(dilations = var_3255_dilations_0, groups = var_3255_groups_0, pad = var_3255_pad_0, pad_type = var_3255_pad_type_0, strides = var_3255_strides_0, weight = layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_3255_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_35_cast_fp16 = add(x = var_3249_cast_fp16, y = var_3255_cast_fp16)[name = string("query_35_cast_fp16")];
+            string var_3264_pad_type_0 = const()[name = string("op_3264_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3264_strides_0 = const()[name = string("op_3264_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3264_pad_0 = const()[name = string("op_3264_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3264_dilations_0 = const()[name = string("op_3264_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3264_groups_0 = const()[name = string("op_3264_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235006016))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235825280))))[name = string("layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3264_cast_fp16 = conv(dilations = var_3264_dilations_0, groups = var_3264_groups_0, pad = var_3264_pad_0, pad_type = var_3264_pad_type_0, strides = var_3264_strides_0, weight = layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_3264_cast_fp16")];
+            string var_3270_pad_type_0 = const()[name = string("op_3270_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3270_strides_0 = const()[name = string("op_3270_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3270_pad_0 = const()[name = string("op_3270_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3270_dilations_0 = const()[name = string("op_3270_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3270_groups_0 = const()[name = string("op_3270_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235854656))), nonzero_data = tensor<fp16, [14566]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235825408))))[name = string("layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3270_cast_fp16 = conv(dilations = var_3270_dilations_0, groups = var_3270_groups_0, pad = var_3270_pad_0, pad_type = var_3270_pad_type_0, strides = var_3270_strides_0, weight = layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_3270_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_35_cast_fp16 = add(x = var_3264_cast_fp16, y = var_3270_cast_fp16)[name = string("key_35_cast_fp16")];
+            string var_3280_pad_type_0 = const()[name = string("op_3280_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3280_strides_0 = const()[name = string("op_3280_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3280_pad_0 = const()[name = string("op_3280_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3280_dilations_0 = const()[name = string("op_3280_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3280_groups_0 = const()[name = string("op_3280_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236059520))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236878784))))[name = string("layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_17_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_17_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236878912)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3280_cast_fp16 = conv(bias = layers_17_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3280_dilations_0, groups = var_3280_groups_0, pad = var_3280_pad_0, pad_type = var_3280_pad_type_0, strides = var_3280_strides_0, weight = layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_3280_cast_fp16")];
+            string var_3286_pad_type_0 = const()[name = string("op_3286_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3286_strides_0 = const()[name = string("op_3286_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3286_pad_0 = const()[name = string("op_3286_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3286_dilations_0 = const()[name = string("op_3286_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3286_groups_0 = const()[name = string("op_3286_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236897920))), nonzero_data = tensor<fp16, [8130]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236881536))))[name = string("layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3286_cast_fp16 = conv(dilations = var_3286_dilations_0, groups = var_3286_groups_0, pad = var_3286_pad_0, pad_type = var_3286_pad_type_0, strides = var_3286_strides_0, weight = layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_3286_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_35_cast_fp16 = add(x = var_3280_cast_fp16, y = var_3286_cast_fp16)[name = string("value_35_cast_fp16")];
+            tensor<int32, [4]> var_3289 = const()[name = string("op_3289"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_35_cast_fp16 = reshape(shape = var_3289, x = query_35_cast_fp16)[name = string("mh_q_35_cast_fp16")];
+            fp16 var_3291_to_fp16 = const()[name = string("op_3291_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_3292_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_3291_to_fp16)[name = string("op_3292_cast_fp16")];
+            tensor<int32, [4]> var_3293 = const()[name = string("op_3293"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3294_cast_fp16 = reshape(shape = var_3293, x = key_35_cast_fp16)[name = string("op_3294_cast_fp16")];
+            bool mh_w_35_transpose_x_0 = const()[name = string("mh_w_35_transpose_x_0"), val = bool(true)];
+            bool mh_w_35_transpose_y_0 = const()[name = string("mh_w_35_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_35_cast_fp16 = matmul(transpose_x = mh_w_35_transpose_x_0, transpose_y = mh_w_35_transpose_y_0, x = var_3292_cast_fp16, y = var_3294_cast_fp16)[name = string("mh_w_35_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3297_cast_fp16 = softmax(axis = var_3208, x = mh_w_35_cast_fp16)[name = string("op_3297_cast_fp16")];
+            tensor<int32, [4]> var_3298 = const()[name = string("op_3298"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3299_cast_fp16 = reshape(shape = var_3298, x = value_35_cast_fp16)[name = string("op_3299_cast_fp16")];
+            bool attn_35_transpose_x_0 = const()[name = string("attn_35_transpose_x_0"), val = bool(false)];
+            bool attn_35_transpose_y_0 = const()[name = string("attn_35_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_3299_cast_fp16, y = var_3297_cast_fp16)[name = string("attn_35_cast_fp16")];
+            tensor<int32, [4]> var_3302 = const()[name = string("op_3302"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_137_cast_fp16 = reshape(shape = var_3302, x = attn_35_cast_fp16)[name = string("input_137_cast_fp16")];
+            string var_3312_pad_type_0 = const()[name = string("op_3312_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3312_strides_0 = const()[name = string("op_3312_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3312_pad_0 = const()[name = string("op_3312_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3312_dilations_0 = const()[name = string("op_3312_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3312_groups_0 = const()[name = string("op_3312_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237102784))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237922048))))[name = string("layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_17_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_17_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237922176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3312_cast_fp16 = conv(bias = layers_17_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3312_dilations_0, groups = var_3312_groups_0, pad = var_3312_pad_0, pad_type = var_3312_pad_type_0, strides = var_3312_strides_0, weight = layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_137_cast_fp16)[name = string("op_3312_cast_fp16")];
+            string var_3318_pad_type_0 = const()[name = string("op_3318_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3318_strides_0 = const()[name = string("op_3318_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3318_pad_0 = const()[name = string("op_3318_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3318_dilations_0 = const()[name = string("op_3318_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3318_groups_0 = const()[name = string("op_3318_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237940736))), nonzero_data = tensor<fp16, [7928]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237924800))))[name = string("layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3318_cast_fp16 = conv(dilations = var_3318_dilations_0, groups = var_3318_groups_0, pad = var_3318_pad_0, pad_type = var_3318_pad_type_0, strides = var_3318_strides_0, weight = layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_137_cast_fp16)[name = string("op_3318_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_71_cast_fp16 = add(x = var_3312_cast_fp16, y = var_3318_cast_fp16)[name = string("obj_71_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_71_cast_fp16)[name = string("inputs_71_cast_fp16")];
+            tensor<int32, [1]> out_71_axes_0 = const()[name = string("out_71_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3329_to_fp16 = const()[name = string("op_3329_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_3329_to_fp16, x = inputs_71_cast_fp16)[name = string("out_71_cast_fp16")];
+            tensor<fp16, [1280]> input_139_gamma_0_to_fp16 = const()[name = string("input_139_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238145600)))];
+            tensor<fp16, [1280]> input_139_beta_0_to_fp16 = const()[name = string("input_139_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238148224)))];
+            fp16 input_139_epsilon_0_to_fp16 = const()[name = string("input_139_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_139_cast_fp16 = batch_norm(beta = input_139_beta_0_to_fp16, epsilon = input_139_epsilon_0_to_fp16, gamma = input_139_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = string("input_139_cast_fp16")];
+            string var_3347_pad_type_0 = const()[name = string("op_3347_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3347_strides_0 = const()[name = string("op_3347_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3347_pad_0 = const()[name = string("op_3347_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3347_dilations_0 = const()[name = string("op_3347_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3347_groups_0 = const()[name = string("op_3347_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_17_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(238150848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(241427712))))[name = string("layers_17_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_17_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_17_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(241427840)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3347_cast_fp16 = conv(bias = layers_17_fc1_inlier_module_bias_to_fp16, dilations = var_3347_dilations_0, groups = var_3347_groups_0, pad = var_3347_pad_0, pad_type = var_3347_pad_type_0, strides = var_3347_strides_0, weight = layers_17_fc1_inlier_module_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = string("op_3347_cast_fp16")];
+            string var_3353_pad_type_0 = const()[name = string("op_3353_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3353_strides_0 = const()[name = string("op_3353_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3353_pad_0 = const()[name = string("op_3353_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3353_dilations_0 = const()[name = string("op_3353_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3353_groups_0 = const()[name = string("op_3353_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_17_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(241516992))), nonzero_data = tensor<fp16, [39374]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(241438144))))[name = string("layers_17_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3353_cast_fp16 = conv(dilations = var_3353_dilations_0, groups = var_3353_groups_0, pad = var_3353_pad_0, pad_type = var_3353_pad_type_0, strides = var_3353_strides_0, weight = layers_17_fc1_outlier_module_weight_to_fp16_sparsified, x = input_139_cast_fp16)[name = string("op_3353_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_141_cast_fp16 = add(x = var_3347_cast_fp16, y = var_3353_cast_fp16)[name = string("input_141_cast_fp16")];
+            string input_143_mode_0 = const()[name = string("input_143_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_143_cast_fp16 = gelu(mode = input_143_mode_0, x = input_141_cast_fp16)[name = string("input_143_cast_fp16")];
+            string var_3364_pad_type_0 = const()[name = string("op_3364_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3364_strides_0 = const()[name = string("op_3364_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3364_pad_0 = const()[name = string("op_3364_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3364_dilations_0 = const()[name = string("op_3364_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3364_groups_0 = const()[name = string("op_3364_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_17_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242336256))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245613120))))[name = string("layers_17_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_17_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_17_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245613248)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3364_cast_fp16 = conv(bias = layers_17_fc2_inlier_module_bias_to_fp16, dilations = var_3364_dilations_0, groups = var_3364_groups_0, pad = var_3364_pad_0, pad_type = var_3364_pad_type_0, strides = var_3364_strides_0, weight = layers_17_fc2_inlier_module_weight_to_fp16_palettized, x = input_143_cast_fp16)[name = string("op_3364_cast_fp16")];
+            string var_3370_pad_type_0 = const()[name = string("op_3370_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3370_strides_0 = const()[name = string("op_3370_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3370_pad_0 = const()[name = string("op_3370_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3370_dilations_0 = const()[name = string("op_3370_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3370_groups_0 = const()[name = string("op_3370_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_17_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245717952))), nonzero_data = tensor<fp16, [51001]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245615872))))[name = string("layers_17_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3370_cast_fp16 = conv(dilations = var_3370_dilations_0, groups = var_3370_groups_0, pad = var_3370_pad_0, pad_type = var_3370_pad_type_0, strides = var_3370_strides_0, weight = layers_17_fc2_outlier_module_weight_to_fp16_sparsified, x = input_143_cast_fp16)[name = string("op_3370_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_39_cast_fp16 = add(x = var_3364_cast_fp16, y = var_3370_cast_fp16)[name = string("hidden_states_39_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_39_cast_fp16)[name = string("inputs_73_cast_fp16")];
+            int32 var_3380 = const()[name = string("op_3380"), val = int32(3)];
+            tensor<int32, [1]> out_73_axes_0 = const()[name = string("out_73_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3399_to_fp16 = const()[name = string("op_3399_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_3399_to_fp16, x = inputs_73_cast_fp16)[name = string("out_73_cast_fp16")];
+            tensor<fp16, [1280]> obj_73_gamma_0_to_fp16 = const()[name = string("obj_73_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246537216)))];
+            tensor<fp16, [1280]> obj_73_beta_0_to_fp16 = const()[name = string("obj_73_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246539840)))];
+            fp16 obj_73_epsilon_0_to_fp16 = const()[name = string("obj_73_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_73_cast_fp16 = batch_norm(beta = obj_73_beta_0_to_fp16, epsilon = obj_73_epsilon_0_to_fp16, gamma = obj_73_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_73_cast_fp16)[name = string("obj_73_cast_fp16")];
+            string var_3421_pad_type_0 = const()[name = string("op_3421_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3421_strides_0 = const()[name = string("op_3421_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3421_pad_0 = const()[name = string("op_3421_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3421_dilations_0 = const()[name = string("op_3421_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3421_groups_0 = const()[name = string("op_3421_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(246542464))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247361728))))[name = string("layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_18_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_18_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247361856)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3421_cast_fp16 = conv(bias = layers_18_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3421_dilations_0, groups = var_3421_groups_0, pad = var_3421_pad_0, pad_type = var_3421_pad_type_0, strides = var_3421_strides_0, weight = layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = string("op_3421_cast_fp16")];
+            string var_3427_pad_type_0 = const()[name = string("op_3427_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3427_strides_0 = const()[name = string("op_3427_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3427_pad_0 = const()[name = string("op_3427_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3427_dilations_0 = const()[name = string("op_3427_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3427_groups_0 = const()[name = string("op_3427_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247417600))), nonzero_data = tensor<fp16, [26500]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247364480))))[name = string("layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3427_cast_fp16 = conv(dilations = var_3427_dilations_0, groups = var_3427_groups_0, pad = var_3427_pad_0, pad_type = var_3427_pad_type_0, strides = var_3427_strides_0, weight = layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = string("op_3427_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_37_cast_fp16 = add(x = var_3421_cast_fp16, y = var_3427_cast_fp16)[name = string("query_37_cast_fp16")];
+            string var_3436_pad_type_0 = const()[name = string("op_3436_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3436_strides_0 = const()[name = string("op_3436_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3436_pad_0 = const()[name = string("op_3436_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3436_dilations_0 = const()[name = string("op_3436_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3436_groups_0 = const()[name = string("op_3436_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247622464))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248441728))))[name = string("layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3436_cast_fp16 = conv(dilations = var_3436_dilations_0, groups = var_3436_groups_0, pad = var_3436_pad_0, pad_type = var_3436_pad_type_0, strides = var_3436_strides_0, weight = layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = string("op_3436_cast_fp16")];
+            string var_3442_pad_type_0 = const()[name = string("op_3442_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3442_strides_0 = const()[name = string("op_3442_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3442_pad_0 = const()[name = string("op_3442_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3442_dilations_0 = const()[name = string("op_3442_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3442_groups_0 = const()[name = string("op_3442_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248472512))), nonzero_data = tensor<fp16, [15282]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248441856))))[name = string("layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3442_cast_fp16 = conv(dilations = var_3442_dilations_0, groups = var_3442_groups_0, pad = var_3442_pad_0, pad_type = var_3442_pad_type_0, strides = var_3442_strides_0, weight = layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = string("op_3442_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_37_cast_fp16 = add(x = var_3436_cast_fp16, y = var_3442_cast_fp16)[name = string("key_37_cast_fp16")];
+            string var_3452_pad_type_0 = const()[name = string("op_3452_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3452_strides_0 = const()[name = string("op_3452_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3452_pad_0 = const()[name = string("op_3452_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3452_dilations_0 = const()[name = string("op_3452_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3452_groups_0 = const()[name = string("op_3452_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(248677376))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249496640))))[name = string("layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_18_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_18_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249496768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3452_cast_fp16 = conv(bias = layers_18_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3452_dilations_0, groups = var_3452_groups_0, pad = var_3452_pad_0, pad_type = var_3452_pad_type_0, strides = var_3452_strides_0, weight = layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = string("op_3452_cast_fp16")];
+            string var_3458_pad_type_0 = const()[name = string("op_3458_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3458_strides_0 = const()[name = string("op_3458_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3458_pad_0 = const()[name = string("op_3458_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3458_dilations_0 = const()[name = string("op_3458_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3458_groups_0 = const()[name = string("op_3458_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249515520))), nonzero_data = tensor<fp16, [8030]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249499392))))[name = string("layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3458_cast_fp16 = conv(dilations = var_3458_dilations_0, groups = var_3458_groups_0, pad = var_3458_pad_0, pad_type = var_3458_pad_type_0, strides = var_3458_strides_0, weight = layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = string("op_3458_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_37_cast_fp16 = add(x = var_3452_cast_fp16, y = var_3458_cast_fp16)[name = string("value_37_cast_fp16")];
+            tensor<int32, [4]> var_3461 = const()[name = string("op_3461"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_37_cast_fp16 = reshape(shape = var_3461, x = query_37_cast_fp16)[name = string("mh_q_37_cast_fp16")];
+            fp16 var_3463_to_fp16 = const()[name = string("op_3463_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_3464_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_3463_to_fp16)[name = string("op_3464_cast_fp16")];
+            tensor<int32, [4]> var_3465 = const()[name = string("op_3465"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3466_cast_fp16 = reshape(shape = var_3465, x = key_37_cast_fp16)[name = string("op_3466_cast_fp16")];
+            bool mh_w_37_transpose_x_0 = const()[name = string("mh_w_37_transpose_x_0"), val = bool(true)];
+            bool mh_w_37_transpose_y_0 = const()[name = string("mh_w_37_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_3464_cast_fp16, y = var_3466_cast_fp16)[name = string("mh_w_37_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3469_cast_fp16 = softmax(axis = var_3380, x = mh_w_37_cast_fp16)[name = string("op_3469_cast_fp16")];
+            tensor<int32, [4]> var_3470 = const()[name = string("op_3470"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3471_cast_fp16 = reshape(shape = var_3470, x = value_37_cast_fp16)[name = string("op_3471_cast_fp16")];
+            bool attn_37_transpose_x_0 = const()[name = string("attn_37_transpose_x_0"), val = bool(false)];
+            bool attn_37_transpose_y_0 = const()[name = string("attn_37_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_3471_cast_fp16, y = var_3469_cast_fp16)[name = string("attn_37_cast_fp16")];
+            tensor<int32, [4]> var_3474 = const()[name = string("op_3474"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_145_cast_fp16 = reshape(shape = var_3474, x = attn_37_cast_fp16)[name = string("input_145_cast_fp16")];
+            string var_3484_pad_type_0 = const()[name = string("op_3484_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3484_strides_0 = const()[name = string("op_3484_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3484_pad_0 = const()[name = string("op_3484_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3484_dilations_0 = const()[name = string("op_3484_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3484_groups_0 = const()[name = string("op_3484_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249720384))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250539648))))[name = string("layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_18_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_18_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250539776)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3484_cast_fp16 = conv(bias = layers_18_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3484_dilations_0, groups = var_3484_groups_0, pad = var_3484_pad_0, pad_type = var_3484_pad_type_0, strides = var_3484_strides_0, weight = layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_145_cast_fp16)[name = string("op_3484_cast_fp16")];
+            string var_3490_pad_type_0 = const()[name = string("op_3490_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3490_strides_0 = const()[name = string("op_3490_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3490_pad_0 = const()[name = string("op_3490_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3490_dilations_0 = const()[name = string("op_3490_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3490_groups_0 = const()[name = string("op_3490_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250558144))), nonzero_data = tensor<fp16, [7810]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250542400))))[name = string("layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3490_cast_fp16 = conv(dilations = var_3490_dilations_0, groups = var_3490_groups_0, pad = var_3490_pad_0, pad_type = var_3490_pad_type_0, strides = var_3490_strides_0, weight = layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_145_cast_fp16)[name = string("op_3490_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_75_cast_fp16 = add(x = var_3484_cast_fp16, y = var_3490_cast_fp16)[name = string("obj_75_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_75_cast_fp16")];
+            tensor<int32, [1]> out_75_axes_0 = const()[name = string("out_75_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3501_to_fp16 = const()[name = string("op_3501_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_75_cast_fp16 = layer_norm(axes = out_75_axes_0, epsilon = var_3501_to_fp16, x = inputs_75_cast_fp16)[name = string("out_75_cast_fp16")];
+            tensor<fp16, [1280]> input_147_gamma_0_to_fp16 = const()[name = string("input_147_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250763008)))];
+            tensor<fp16, [1280]> input_147_beta_0_to_fp16 = const()[name = string("input_147_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250765632)))];
+            fp16 input_147_epsilon_0_to_fp16 = const()[name = string("input_147_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_147_cast_fp16 = batch_norm(beta = input_147_beta_0_to_fp16, epsilon = input_147_epsilon_0_to_fp16, gamma = input_147_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_75_cast_fp16)[name = string("input_147_cast_fp16")];
+            string var_3519_pad_type_0 = const()[name = string("op_3519_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3519_strides_0 = const()[name = string("op_3519_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3519_pad_0 = const()[name = string("op_3519_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3519_dilations_0 = const()[name = string("op_3519_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3519_groups_0 = const()[name = string("op_3519_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_18_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250768256))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254045120))))[name = string("layers_18_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_18_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_18_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254045248)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3519_cast_fp16 = conv(bias = layers_18_fc1_inlier_module_bias_to_fp16, dilations = var_3519_dilations_0, groups = var_3519_groups_0, pad = var_3519_pad_0, pad_type = var_3519_pad_type_0, strides = var_3519_strides_0, weight = layers_18_fc1_inlier_module_weight_to_fp16_palettized, x = input_147_cast_fp16)[name = string("op_3519_cast_fp16")];
+            string var_3525_pad_type_0 = const()[name = string("op_3525_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3525_strides_0 = const()[name = string("op_3525_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3525_pad_0 = const()[name = string("op_3525_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3525_dilations_0 = const()[name = string("op_3525_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3525_groups_0 = const()[name = string("op_3525_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_18_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254130816))), nonzero_data = tensor<fp16, [37590]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254055552))))[name = string("layers_18_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3525_cast_fp16 = conv(dilations = var_3525_dilations_0, groups = var_3525_groups_0, pad = var_3525_pad_0, pad_type = var_3525_pad_type_0, strides = var_3525_strides_0, weight = layers_18_fc1_outlier_module_weight_to_fp16_sparsified, x = input_147_cast_fp16)[name = string("op_3525_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_149_cast_fp16 = add(x = var_3519_cast_fp16, y = var_3525_cast_fp16)[name = string("input_149_cast_fp16")];
+            string input_151_mode_0 = const()[name = string("input_151_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = string("input_151_cast_fp16")];
+            string var_3536_pad_type_0 = const()[name = string("op_3536_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3536_strides_0 = const()[name = string("op_3536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3536_pad_0 = const()[name = string("op_3536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3536_dilations_0 = const()[name = string("op_3536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3536_groups_0 = const()[name = string("op_3536_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_18_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(254950080))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258226944))))[name = string("layers_18_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_18_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_18_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258227072)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3536_cast_fp16 = conv(bias = layers_18_fc2_inlier_module_bias_to_fp16, dilations = var_3536_dilations_0, groups = var_3536_groups_0, pad = var_3536_pad_0, pad_type = var_3536_pad_type_0, strides = var_3536_strides_0, weight = layers_18_fc2_inlier_module_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = string("op_3536_cast_fp16")];
+            string var_3542_pad_type_0 = const()[name = string("op_3542_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3542_strides_0 = const()[name = string("op_3542_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3542_pad_0 = const()[name = string("op_3542_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3542_dilations_0 = const()[name = string("op_3542_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3542_groups_0 = const()[name = string("op_3542_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_18_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258318976))), nonzero_data = tensor<fp16, [44591]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258229696))))[name = string("layers_18_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3542_cast_fp16 = conv(dilations = var_3542_dilations_0, groups = var_3542_groups_0, pad = var_3542_pad_0, pad_type = var_3542_pad_type_0, strides = var_3542_strides_0, weight = layers_18_fc2_outlier_module_weight_to_fp16_sparsified, x = input_151_cast_fp16)[name = string("op_3542_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_41_cast_fp16 = add(x = var_3536_cast_fp16, y = var_3542_cast_fp16)[name = string("hidden_states_41_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = hidden_states_41_cast_fp16)[name = string("inputs_77_cast_fp16")];
+            int32 var_3552 = const()[name = string("op_3552"), val = int32(3)];
+            tensor<int32, [1]> out_77_axes_0 = const()[name = string("out_77_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3571_to_fp16 = const()[name = string("op_3571_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_77_cast_fp16 = layer_norm(axes = out_77_axes_0, epsilon = var_3571_to_fp16, x = inputs_77_cast_fp16)[name = string("out_77_cast_fp16")];
+            tensor<fp16, [1280]> obj_77_gamma_0_to_fp16 = const()[name = string("obj_77_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259138240)))];
+            tensor<fp16, [1280]> obj_77_beta_0_to_fp16 = const()[name = string("obj_77_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259140864)))];
+            fp16 obj_77_epsilon_0_to_fp16 = const()[name = string("obj_77_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_77_cast_fp16 = batch_norm(beta = obj_77_beta_0_to_fp16, epsilon = obj_77_epsilon_0_to_fp16, gamma = obj_77_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_77_cast_fp16)[name = string("obj_77_cast_fp16")];
+            string var_3593_pad_type_0 = const()[name = string("op_3593_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3593_strides_0 = const()[name = string("op_3593_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3593_pad_0 = const()[name = string("op_3593_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3593_dilations_0 = const()[name = string("op_3593_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3593_groups_0 = const()[name = string("op_3593_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259143488))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259962752))))[name = string("layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_19_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_19_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259962880)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3593_cast_fp16 = conv(bias = layers_19_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3593_dilations_0, groups = var_3593_groups_0, pad = var_3593_pad_0, pad_type = var_3593_pad_type_0, strides = var_3593_strides_0, weight = layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = string("op_3593_cast_fp16")];
+            string var_3599_pad_type_0 = const()[name = string("op_3599_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3599_strides_0 = const()[name = string("op_3599_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3599_pad_0 = const()[name = string("op_3599_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3599_dilations_0 = const()[name = string("op_3599_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3599_groups_0 = const()[name = string("op_3599_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260020736))), nonzero_data = tensor<fp16, [27561]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(259965504))))[name = string("layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3599_cast_fp16 = conv(dilations = var_3599_dilations_0, groups = var_3599_groups_0, pad = var_3599_pad_0, pad_type = var_3599_pad_type_0, strides = var_3599_strides_0, weight = layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = string("op_3599_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_39_cast_fp16 = add(x = var_3593_cast_fp16, y = var_3599_cast_fp16)[name = string("query_39_cast_fp16")];
+            string var_3608_pad_type_0 = const()[name = string("op_3608_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3608_strides_0 = const()[name = string("op_3608_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3608_pad_0 = const()[name = string("op_3608_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3608_dilations_0 = const()[name = string("op_3608_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3608_groups_0 = const()[name = string("op_3608_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260225600))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261044864))))[name = string("layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3608_cast_fp16 = conv(dilations = var_3608_dilations_0, groups = var_3608_groups_0, pad = var_3608_pad_0, pad_type = var_3608_pad_type_0, strides = var_3608_strides_0, weight = layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = string("op_3608_cast_fp16")];
+            string var_3614_pad_type_0 = const()[name = string("op_3614_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3614_strides_0 = const()[name = string("op_3614_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3614_pad_0 = const()[name = string("op_3614_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3614_dilations_0 = const()[name = string("op_3614_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3614_groups_0 = const()[name = string("op_3614_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261079936))), nonzero_data = tensor<fp16, [17431]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261044992))))[name = string("layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3614_cast_fp16 = conv(dilations = var_3614_dilations_0, groups = var_3614_groups_0, pad = var_3614_pad_0, pad_type = var_3614_pad_type_0, strides = var_3614_strides_0, weight = layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = string("op_3614_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_39_cast_fp16 = add(x = var_3608_cast_fp16, y = var_3614_cast_fp16)[name = string("key_39_cast_fp16")];
+            string var_3624_pad_type_0 = const()[name = string("op_3624_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3624_strides_0 = const()[name = string("op_3624_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3624_pad_0 = const()[name = string("op_3624_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3624_dilations_0 = const()[name = string("op_3624_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3624_groups_0 = const()[name = string("op_3624_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261284800))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262104064))))[name = string("layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_19_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_19_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262104192)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3624_cast_fp16 = conv(bias = layers_19_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3624_dilations_0, groups = var_3624_groups_0, pad = var_3624_pad_0, pad_type = var_3624_pad_type_0, strides = var_3624_strides_0, weight = layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = string("op_3624_cast_fp16")];
+            string var_3630_pad_type_0 = const()[name = string("op_3630_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3630_strides_0 = const()[name = string("op_3630_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3630_pad_0 = const()[name = string("op_3630_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3630_dilations_0 = const()[name = string("op_3630_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3630_groups_0 = const()[name = string("op_3630_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262121984))), nonzero_data = tensor<fp16, [7538]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262106816))))[name = string("layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3630_cast_fp16 = conv(dilations = var_3630_dilations_0, groups = var_3630_groups_0, pad = var_3630_pad_0, pad_type = var_3630_pad_type_0, strides = var_3630_strides_0, weight = layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = string("op_3630_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_39_cast_fp16 = add(x = var_3624_cast_fp16, y = var_3630_cast_fp16)[name = string("value_39_cast_fp16")];
+            tensor<int32, [4]> var_3633 = const()[name = string("op_3633"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_39_cast_fp16 = reshape(shape = var_3633, x = query_39_cast_fp16)[name = string("mh_q_39_cast_fp16")];
+            fp16 var_3635_to_fp16 = const()[name = string("op_3635_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_3636_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_3635_to_fp16)[name = string("op_3636_cast_fp16")];
+            tensor<int32, [4]> var_3637 = const()[name = string("op_3637"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3638_cast_fp16 = reshape(shape = var_3637, x = key_39_cast_fp16)[name = string("op_3638_cast_fp16")];
+            bool mh_w_39_transpose_x_0 = const()[name = string("mh_w_39_transpose_x_0"), val = bool(true)];
+            bool mh_w_39_transpose_y_0 = const()[name = string("mh_w_39_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_39_cast_fp16 = matmul(transpose_x = mh_w_39_transpose_x_0, transpose_y = mh_w_39_transpose_y_0, x = var_3636_cast_fp16, y = var_3638_cast_fp16)[name = string("mh_w_39_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3641_cast_fp16 = softmax(axis = var_3552, x = mh_w_39_cast_fp16)[name = string("op_3641_cast_fp16")];
+            tensor<int32, [4]> var_3642 = const()[name = string("op_3642"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3643_cast_fp16 = reshape(shape = var_3642, x = value_39_cast_fp16)[name = string("op_3643_cast_fp16")];
+            bool attn_39_transpose_x_0 = const()[name = string("attn_39_transpose_x_0"), val = bool(false)];
+            bool attn_39_transpose_y_0 = const()[name = string("attn_39_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_3643_cast_fp16, y = var_3641_cast_fp16)[name = string("attn_39_cast_fp16")];
+            tensor<int32, [4]> var_3646 = const()[name = string("op_3646"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_153_cast_fp16 = reshape(shape = var_3646, x = attn_39_cast_fp16)[name = string("input_153_cast_fp16")];
+            string var_3656_pad_type_0 = const()[name = string("op_3656_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3656_strides_0 = const()[name = string("op_3656_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3656_pad_0 = const()[name = string("op_3656_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3656_dilations_0 = const()[name = string("op_3656_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3656_groups_0 = const()[name = string("op_3656_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262326848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263146112))))[name = string("layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_19_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_19_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263146240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3656_cast_fp16 = conv(bias = layers_19_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3656_dilations_0, groups = var_3656_groups_0, pad = var_3656_pad_0, pad_type = var_3656_pad_type_0, strides = var_3656_strides_0, weight = layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = string("op_3656_cast_fp16")];
+            string var_3662_pad_type_0 = const()[name = string("op_3662_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3662_strides_0 = const()[name = string("op_3662_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3662_pad_0 = const()[name = string("op_3662_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3662_dilations_0 = const()[name = string("op_3662_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3662_groups_0 = const()[name = string("op_3662_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263162496))), nonzero_data = tensor<fp16, [6783]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263148864))))[name = string("layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3662_cast_fp16 = conv(dilations = var_3662_dilations_0, groups = var_3662_groups_0, pad = var_3662_pad_0, pad_type = var_3662_pad_type_0, strides = var_3662_strides_0, weight = layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_153_cast_fp16)[name = string("op_3662_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_79_cast_fp16 = add(x = var_3656_cast_fp16, y = var_3662_cast_fp16)[name = string("obj_79_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = obj_79_cast_fp16)[name = string("inputs_79_cast_fp16")];
+            tensor<int32, [1]> out_79_axes_0 = const()[name = string("out_79_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3673_to_fp16 = const()[name = string("op_3673_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_3673_to_fp16, x = inputs_79_cast_fp16)[name = string("out_79_cast_fp16")];
+            tensor<fp16, [1280]> input_155_gamma_0_to_fp16 = const()[name = string("input_155_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263367360)))];
+            tensor<fp16, [1280]> input_155_beta_0_to_fp16 = const()[name = string("input_155_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263369984)))];
+            fp16 input_155_epsilon_0_to_fp16 = const()[name = string("input_155_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_155_cast_fp16 = batch_norm(beta = input_155_beta_0_to_fp16, epsilon = input_155_epsilon_0_to_fp16, gamma = input_155_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_79_cast_fp16)[name = string("input_155_cast_fp16")];
+            string var_3691_pad_type_0 = const()[name = string("op_3691_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3691_strides_0 = const()[name = string("op_3691_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3691_pad_0 = const()[name = string("op_3691_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3691_dilations_0 = const()[name = string("op_3691_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3691_groups_0 = const()[name = string("op_3691_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_19_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(263372608))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266649472))))[name = string("layers_19_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_19_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_19_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266649600)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3691_cast_fp16 = conv(bias = layers_19_fc1_inlier_module_bias_to_fp16, dilations = var_3691_dilations_0, groups = var_3691_groups_0, pad = var_3691_pad_0, pad_type = var_3691_pad_type_0, strides = var_3691_strides_0, weight = layers_19_fc1_inlier_module_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = string("op_3691_cast_fp16")];
+            string var_3697_pad_type_0 = const()[name = string("op_3697_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3697_strides_0 = const()[name = string("op_3697_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3697_pad_0 = const()[name = string("op_3697_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3697_dilations_0 = const()[name = string("op_3697_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3697_groups_0 = const()[name = string("op_3697_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_19_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266736192))), nonzero_data = tensor<fp16, [38107]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266659904))))[name = string("layers_19_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3697_cast_fp16 = conv(dilations = var_3697_dilations_0, groups = var_3697_groups_0, pad = var_3697_pad_0, pad_type = var_3697_pad_type_0, strides = var_3697_strides_0, weight = layers_19_fc1_outlier_module_weight_to_fp16_sparsified, x = input_155_cast_fp16)[name = string("op_3697_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_157_cast_fp16 = add(x = var_3691_cast_fp16, y = var_3697_cast_fp16)[name = string("input_157_cast_fp16")];
+            string input_159_mode_0 = const()[name = string("input_159_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_159_cast_fp16 = gelu(mode = input_159_mode_0, x = input_157_cast_fp16)[name = string("input_159_cast_fp16")];
+            string var_3708_pad_type_0 = const()[name = string("op_3708_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3708_strides_0 = const()[name = string("op_3708_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3708_pad_0 = const()[name = string("op_3708_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3708_dilations_0 = const()[name = string("op_3708_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3708_groups_0 = const()[name = string("op_3708_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_19_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267555456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270832320))))[name = string("layers_19_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_19_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_19_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270832448)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3708_cast_fp16 = conv(bias = layers_19_fc2_inlier_module_bias_to_fp16, dilations = var_3708_dilations_0, groups = var_3708_groups_0, pad = var_3708_pad_0, pad_type = var_3708_pad_type_0, strides = var_3708_strides_0, weight = layers_19_fc2_inlier_module_weight_to_fp16_palettized, x = input_159_cast_fp16)[name = string("op_3708_cast_fp16")];
+            string var_3714_pad_type_0 = const()[name = string("op_3714_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3714_strides_0 = const()[name = string("op_3714_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3714_pad_0 = const()[name = string("op_3714_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3714_dilations_0 = const()[name = string("op_3714_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3714_groups_0 = const()[name = string("op_3714_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_19_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270911040))), nonzero_data = tensor<fp16, [37935]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270835072))))[name = string("layers_19_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3714_cast_fp16 = conv(dilations = var_3714_dilations_0, groups = var_3714_groups_0, pad = var_3714_pad_0, pad_type = var_3714_pad_type_0, strides = var_3714_strides_0, weight = layers_19_fc2_outlier_module_weight_to_fp16_sparsified, x = input_159_cast_fp16)[name = string("op_3714_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_43_cast_fp16 = add(x = var_3708_cast_fp16, y = var_3714_cast_fp16)[name = string("hidden_states_43_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = hidden_states_43_cast_fp16)[name = string("inputs_81_cast_fp16")];
+            int32 var_3724 = const()[name = string("op_3724"), val = int32(3)];
+            tensor<int32, [1]> out_81_axes_0 = const()[name = string("out_81_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3743_to_fp16 = const()[name = string("op_3743_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_81_cast_fp16 = layer_norm(axes = out_81_axes_0, epsilon = var_3743_to_fp16, x = inputs_81_cast_fp16)[name = string("out_81_cast_fp16")];
+            tensor<fp16, [1280]> obj_81_gamma_0_to_fp16 = const()[name = string("obj_81_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271730304)))];
+            tensor<fp16, [1280]> obj_81_beta_0_to_fp16 = const()[name = string("obj_81_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271732928)))];
+            fp16 obj_81_epsilon_0_to_fp16 = const()[name = string("obj_81_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_81_cast_fp16 = batch_norm(beta = obj_81_beta_0_to_fp16, epsilon = obj_81_epsilon_0_to_fp16, gamma = obj_81_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_81_cast_fp16)[name = string("obj_81_cast_fp16")];
+            string var_3765_pad_type_0 = const()[name = string("op_3765_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3765_strides_0 = const()[name = string("op_3765_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3765_pad_0 = const()[name = string("op_3765_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3765_dilations_0 = const()[name = string("op_3765_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3765_groups_0 = const()[name = string("op_3765_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(271735552))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272554816))))[name = string("layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_20_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_20_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272554944)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3765_cast_fp16 = conv(bias = layers_20_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3765_dilations_0, groups = var_3765_groups_0, pad = var_3765_pad_0, pad_type = var_3765_pad_type_0, strides = var_3765_strides_0, weight = layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = string("op_3765_cast_fp16")];
+            string var_3771_pad_type_0 = const()[name = string("op_3771_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3771_strides_0 = const()[name = string("op_3771_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3771_pad_0 = const()[name = string("op_3771_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3771_dilations_0 = const()[name = string("op_3771_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3771_groups_0 = const()[name = string("op_3771_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272594048))), nonzero_data = tensor<fp16, [18178]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272557568))))[name = string("layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3771_cast_fp16 = conv(dilations = var_3771_dilations_0, groups = var_3771_groups_0, pad = var_3771_pad_0, pad_type = var_3771_pad_type_0, strides = var_3771_strides_0, weight = layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = string("op_3771_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_41_cast_fp16 = add(x = var_3765_cast_fp16, y = var_3771_cast_fp16)[name = string("query_41_cast_fp16")];
+            string var_3780_pad_type_0 = const()[name = string("op_3780_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3780_strides_0 = const()[name = string("op_3780_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3780_pad_0 = const()[name = string("op_3780_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3780_dilations_0 = const()[name = string("op_3780_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3780_groups_0 = const()[name = string("op_3780_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272798912))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273618176))))[name = string("layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3780_cast_fp16 = conv(dilations = var_3780_dilations_0, groups = var_3780_groups_0, pad = var_3780_pad_0, pad_type = var_3780_pad_type_0, strides = var_3780_strides_0, weight = layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = string("op_3780_cast_fp16")];
+            string var_3786_pad_type_0 = const()[name = string("op_3786_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3786_strides_0 = const()[name = string("op_3786_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3786_pad_0 = const()[name = string("op_3786_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3786_dilations_0 = const()[name = string("op_3786_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3786_groups_0 = const()[name = string("op_3786_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273644416))), nonzero_data = tensor<fp16, [13001]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273618304))))[name = string("layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3786_cast_fp16 = conv(dilations = var_3786_dilations_0, groups = var_3786_groups_0, pad = var_3786_pad_0, pad_type = var_3786_pad_type_0, strides = var_3786_strides_0, weight = layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = string("op_3786_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_41_cast_fp16 = add(x = var_3780_cast_fp16, y = var_3786_cast_fp16)[name = string("key_41_cast_fp16")];
+            string var_3796_pad_type_0 = const()[name = string("op_3796_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3796_strides_0 = const()[name = string("op_3796_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3796_pad_0 = const()[name = string("op_3796_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3796_dilations_0 = const()[name = string("op_3796_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3796_groups_0 = const()[name = string("op_3796_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273849280))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274668544))))[name = string("layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_20_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_20_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274668672)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3796_cast_fp16 = conv(bias = layers_20_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3796_dilations_0, groups = var_3796_groups_0, pad = var_3796_pad_0, pad_type = var_3796_pad_type_0, strides = var_3796_strides_0, weight = layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = string("op_3796_cast_fp16")];
+            string var_3802_pad_type_0 = const()[name = string("op_3802_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3802_strides_0 = const()[name = string("op_3802_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3802_pad_0 = const()[name = string("op_3802_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3802_dilations_0 = const()[name = string("op_3802_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3802_groups_0 = const()[name = string("op_3802_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274685312))), nonzero_data = tensor<fp16, [6955]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274671296))))[name = string("layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3802_cast_fp16 = conv(dilations = var_3802_dilations_0, groups = var_3802_groups_0, pad = var_3802_pad_0, pad_type = var_3802_pad_type_0, strides = var_3802_strides_0, weight = layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = string("op_3802_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_41_cast_fp16 = add(x = var_3796_cast_fp16, y = var_3802_cast_fp16)[name = string("value_41_cast_fp16")];
+            tensor<int32, [4]> var_3805 = const()[name = string("op_3805"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_41_cast_fp16 = reshape(shape = var_3805, x = query_41_cast_fp16)[name = string("mh_q_41_cast_fp16")];
+            fp16 var_3807_to_fp16 = const()[name = string("op_3807_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_3808_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_3807_to_fp16)[name = string("op_3808_cast_fp16")];
+            tensor<int32, [4]> var_3809 = const()[name = string("op_3809"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3810_cast_fp16 = reshape(shape = var_3809, x = key_41_cast_fp16)[name = string("op_3810_cast_fp16")];
+            bool mh_w_41_transpose_x_0 = const()[name = string("mh_w_41_transpose_x_0"), val = bool(true)];
+            bool mh_w_41_transpose_y_0 = const()[name = string("mh_w_41_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_3808_cast_fp16, y = var_3810_cast_fp16)[name = string("mh_w_41_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3813_cast_fp16 = softmax(axis = var_3724, x = mh_w_41_cast_fp16)[name = string("op_3813_cast_fp16")];
+            tensor<int32, [4]> var_3814 = const()[name = string("op_3814"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3815_cast_fp16 = reshape(shape = var_3814, x = value_41_cast_fp16)[name = string("op_3815_cast_fp16")];
+            bool attn_41_transpose_x_0 = const()[name = string("attn_41_transpose_x_0"), val = bool(false)];
+            bool attn_41_transpose_y_0 = const()[name = string("attn_41_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3815_cast_fp16, y = var_3813_cast_fp16)[name = string("attn_41_cast_fp16")];
+            tensor<int32, [4]> var_3818 = const()[name = string("op_3818"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_161_cast_fp16 = reshape(shape = var_3818, x = attn_41_cast_fp16)[name = string("input_161_cast_fp16")];
+            string var_3828_pad_type_0 = const()[name = string("op_3828_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3828_strides_0 = const()[name = string("op_3828_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3828_pad_0 = const()[name = string("op_3828_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3828_dilations_0 = const()[name = string("op_3828_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3828_groups_0 = const()[name = string("op_3828_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274890176))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275709440))))[name = string("layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_20_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_20_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275709568)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3828_cast_fp16 = conv(bias = layers_20_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3828_dilations_0, groups = var_3828_groups_0, pad = var_3828_pad_0, pad_type = var_3828_pad_type_0, strides = var_3828_strides_0, weight = layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = string("op_3828_cast_fp16")];
+            string var_3834_pad_type_0 = const()[name = string("op_3834_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3834_strides_0 = const()[name = string("op_3834_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3834_pad_0 = const()[name = string("op_3834_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3834_dilations_0 = const()[name = string("op_3834_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3834_groups_0 = const()[name = string("op_3834_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275726912))), nonzero_data = tensor<fp16, [7315]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275712192))))[name = string("layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3834_cast_fp16 = conv(dilations = var_3834_dilations_0, groups = var_3834_groups_0, pad = var_3834_pad_0, pad_type = var_3834_pad_type_0, strides = var_3834_strides_0, weight = layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_161_cast_fp16)[name = string("op_3834_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_83_cast_fp16 = add(x = var_3828_cast_fp16, y = var_3834_cast_fp16)[name = string("obj_83_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = obj_83_cast_fp16)[name = string("inputs_83_cast_fp16")];
+            tensor<int32, [1]> out_83_axes_0 = const()[name = string("out_83_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3845_to_fp16 = const()[name = string("op_3845_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_83_cast_fp16 = layer_norm(axes = out_83_axes_0, epsilon = var_3845_to_fp16, x = inputs_83_cast_fp16)[name = string("out_83_cast_fp16")];
+            tensor<fp16, [1280]> input_163_gamma_0_to_fp16 = const()[name = string("input_163_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275931776)))];
+            tensor<fp16, [1280]> input_163_beta_0_to_fp16 = const()[name = string("input_163_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275934400)))];
+            fp16 input_163_epsilon_0_to_fp16 = const()[name = string("input_163_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_163_cast_fp16 = batch_norm(beta = input_163_beta_0_to_fp16, epsilon = input_163_epsilon_0_to_fp16, gamma = input_163_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_83_cast_fp16)[name = string("input_163_cast_fp16")];
+            string var_3863_pad_type_0 = const()[name = string("op_3863_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3863_strides_0 = const()[name = string("op_3863_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3863_pad_0 = const()[name = string("op_3863_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3863_dilations_0 = const()[name = string("op_3863_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3863_groups_0 = const()[name = string("op_3863_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_20_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275937024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279213888))))[name = string("layers_20_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_20_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_20_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279214016)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3863_cast_fp16 = conv(bias = layers_20_fc1_inlier_module_bias_to_fp16, dilations = var_3863_dilations_0, groups = var_3863_groups_0, pad = var_3863_pad_0, pad_type = var_3863_pad_type_0, strides = var_3863_strides_0, weight = layers_20_fc1_inlier_module_weight_to_fp16_palettized, x = input_163_cast_fp16)[name = string("op_3863_cast_fp16")];
+            string var_3869_pad_type_0 = const()[name = string("op_3869_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3869_strides_0 = const()[name = string("op_3869_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3869_pad_0 = const()[name = string("op_3869_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3869_dilations_0 = const()[name = string("op_3869_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3869_groups_0 = const()[name = string("op_3869_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_20_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279311872))), nonzero_data = tensor<fp16, [43740]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(279224320))))[name = string("layers_20_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_3869_cast_fp16 = conv(dilations = var_3869_dilations_0, groups = var_3869_groups_0, pad = var_3869_pad_0, pad_type = var_3869_pad_type_0, strides = var_3869_strides_0, weight = layers_20_fc1_outlier_module_weight_to_fp16_sparsified, x = input_163_cast_fp16)[name = string("op_3869_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_165_cast_fp16 = add(x = var_3863_cast_fp16, y = var_3869_cast_fp16)[name = string("input_165_cast_fp16")];
+            string input_167_mode_0 = const()[name = string("input_167_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_167_cast_fp16 = gelu(mode = input_167_mode_0, x = input_165_cast_fp16)[name = string("input_167_cast_fp16")];
+            string var_3880_pad_type_0 = const()[name = string("op_3880_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3880_strides_0 = const()[name = string("op_3880_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3880_pad_0 = const()[name = string("op_3880_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3880_dilations_0 = const()[name = string("op_3880_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3880_groups_0 = const()[name = string("op_3880_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_20_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(280131136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283408000))))[name = string("layers_20_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_20_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_20_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283408128)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3880_cast_fp16 = conv(bias = layers_20_fc2_inlier_module_bias_to_fp16, dilations = var_3880_dilations_0, groups = var_3880_groups_0, pad = var_3880_pad_0, pad_type = var_3880_pad_type_0, strides = var_3880_strides_0, weight = layers_20_fc2_inlier_module_weight_to_fp16_palettized, x = input_167_cast_fp16)[name = string("op_3880_cast_fp16")];
+            string var_3886_pad_type_0 = const()[name = string("op_3886_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3886_strides_0 = const()[name = string("op_3886_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3886_pad_0 = const()[name = string("op_3886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3886_dilations_0 = const()[name = string("op_3886_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3886_groups_0 = const()[name = string("op_3886_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_20_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283466304))), nonzero_data = tensor<fp16, [27714]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283410752))))[name = string("layers_20_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3886_cast_fp16 = conv(dilations = var_3886_dilations_0, groups = var_3886_groups_0, pad = var_3886_pad_0, pad_type = var_3886_pad_type_0, strides = var_3886_strides_0, weight = layers_20_fc2_outlier_module_weight_to_fp16_sparsified, x = input_167_cast_fp16)[name = string("op_3886_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_45_cast_fp16 = add(x = var_3880_cast_fp16, y = var_3886_cast_fp16)[name = string("hidden_states_45_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = hidden_states_45_cast_fp16)[name = string("inputs_85_cast_fp16")];
+            int32 var_3896 = const()[name = string("op_3896"), val = int32(3)];
+            tensor<int32, [1]> out_85_axes_0 = const()[name = string("out_85_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3915_to_fp16 = const()[name = string("op_3915_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_3915_to_fp16, x = inputs_85_cast_fp16)[name = string("out_85_cast_fp16")];
+            tensor<fp16, [1280]> obj_85_gamma_0_to_fp16 = const()[name = string("obj_85_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284285568)))];
+            tensor<fp16, [1280]> obj_85_beta_0_to_fp16 = const()[name = string("obj_85_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284288192)))];
+            fp16 obj_85_epsilon_0_to_fp16 = const()[name = string("obj_85_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_85_cast_fp16)[name = string("obj_85_cast_fp16")];
+            string var_3937_pad_type_0 = const()[name = string("op_3937_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3937_strides_0 = const()[name = string("op_3937_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3937_pad_0 = const()[name = string("op_3937_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3937_dilations_0 = const()[name = string("op_3937_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3937_groups_0 = const()[name = string("op_3937_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284290816))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285110080))))[name = string("layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_21_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_21_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285110208)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3937_cast_fp16 = conv(bias = layers_21_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3937_dilations_0, groups = var_3937_groups_0, pad = var_3937_pad_0, pad_type = var_3937_pad_type_0, strides = var_3937_strides_0, weight = layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = string("op_3937_cast_fp16")];
+            string var_3943_pad_type_0 = const()[name = string("op_3943_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3943_strides_0 = const()[name = string("op_3943_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3943_pad_0 = const()[name = string("op_3943_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3943_dilations_0 = const()[name = string("op_3943_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3943_groups_0 = const()[name = string("op_3943_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285140544))), nonzero_data = tensor<fp16, [13818]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285112832))))[name = string("layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3943_cast_fp16 = conv(dilations = var_3943_dilations_0, groups = var_3943_groups_0, pad = var_3943_pad_0, pad_type = var_3943_pad_type_0, strides = var_3943_strides_0, weight = layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = string("op_3943_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_43_cast_fp16 = add(x = var_3937_cast_fp16, y = var_3943_cast_fp16)[name = string("query_43_cast_fp16")];
+            string var_3952_pad_type_0 = const()[name = string("op_3952_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3952_strides_0 = const()[name = string("op_3952_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3952_pad_0 = const()[name = string("op_3952_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3952_dilations_0 = const()[name = string("op_3952_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3952_groups_0 = const()[name = string("op_3952_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(285345408))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286164672))))[name = string("layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3952_cast_fp16 = conv(dilations = var_3952_dilations_0, groups = var_3952_groups_0, pad = var_3952_pad_0, pad_type = var_3952_pad_type_0, strides = var_3952_strides_0, weight = layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = string("op_3952_cast_fp16")];
+            string var_3958_pad_type_0 = const()[name = string("op_3958_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3958_strides_0 = const()[name = string("op_3958_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3958_pad_0 = const()[name = string("op_3958_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3958_dilations_0 = const()[name = string("op_3958_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3958_groups_0 = const()[name = string("op_3958_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286193792))), nonzero_data = tensor<fp16, [14444]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286164800))))[name = string("layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3958_cast_fp16 = conv(dilations = var_3958_dilations_0, groups = var_3958_groups_0, pad = var_3958_pad_0, pad_type = var_3958_pad_type_0, strides = var_3958_strides_0, weight = layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = string("op_3958_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_43_cast_fp16 = add(x = var_3952_cast_fp16, y = var_3958_cast_fp16)[name = string("key_43_cast_fp16")];
+            string var_3968_pad_type_0 = const()[name = string("op_3968_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3968_strides_0 = const()[name = string("op_3968_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3968_pad_0 = const()[name = string("op_3968_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3968_dilations_0 = const()[name = string("op_3968_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3968_groups_0 = const()[name = string("op_3968_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286398656))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287217920))))[name = string("layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_21_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_21_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287218048)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3968_cast_fp16 = conv(bias = layers_21_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3968_dilations_0, groups = var_3968_groups_0, pad = var_3968_pad_0, pad_type = var_3968_pad_type_0, strides = var_3968_strides_0, weight = layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = string("op_3968_cast_fp16")];
+            string var_3974_pad_type_0 = const()[name = string("op_3974_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3974_strides_0 = const()[name = string("op_3974_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3974_pad_0 = const()[name = string("op_3974_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3974_dilations_0 = const()[name = string("op_3974_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3974_groups_0 = const()[name = string("op_3974_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287233472))), nonzero_data = tensor<fp16, [6347]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287220672))))[name = string("layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_3974_cast_fp16 = conv(dilations = var_3974_dilations_0, groups = var_3974_groups_0, pad = var_3974_pad_0, pad_type = var_3974_pad_type_0, strides = var_3974_strides_0, weight = layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = string("op_3974_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_43_cast_fp16 = add(x = var_3968_cast_fp16, y = var_3974_cast_fp16)[name = string("value_43_cast_fp16")];
+            tensor<int32, [4]> var_3977 = const()[name = string("op_3977"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_43_cast_fp16 = reshape(shape = var_3977, x = query_43_cast_fp16)[name = string("mh_q_43_cast_fp16")];
+            fp16 var_3979_to_fp16 = const()[name = string("op_3979_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_3980_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_3979_to_fp16)[name = string("op_3980_cast_fp16")];
+            tensor<int32, [4]> var_3981 = const()[name = string("op_3981"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3982_cast_fp16 = reshape(shape = var_3981, x = key_43_cast_fp16)[name = string("op_3982_cast_fp16")];
+            bool mh_w_43_transpose_x_0 = const()[name = string("mh_w_43_transpose_x_0"), val = bool(true)];
+            bool mh_w_43_transpose_y_0 = const()[name = string("mh_w_43_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_3980_cast_fp16, y = var_3982_cast_fp16)[name = string("mh_w_43_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_3985_cast_fp16 = softmax(axis = var_3896, x = mh_w_43_cast_fp16)[name = string("op_3985_cast_fp16")];
+            tensor<int32, [4]> var_3986 = const()[name = string("op_3986"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_3987_cast_fp16 = reshape(shape = var_3986, x = value_43_cast_fp16)[name = string("op_3987_cast_fp16")];
+            bool attn_43_transpose_x_0 = const()[name = string("attn_43_transpose_x_0"), val = bool(false)];
+            bool attn_43_transpose_y_0 = const()[name = string("attn_43_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3987_cast_fp16, y = var_3985_cast_fp16)[name = string("attn_43_cast_fp16")];
+            tensor<int32, [4]> var_3990 = const()[name = string("op_3990"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_169_cast_fp16 = reshape(shape = var_3990, x = attn_43_cast_fp16)[name = string("input_169_cast_fp16")];
+            string var_4000_pad_type_0 = const()[name = string("op_4000_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4000_strides_0 = const()[name = string("op_4000_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4000_pad_0 = const()[name = string("op_4000_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4000_dilations_0 = const()[name = string("op_4000_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4000_groups_0 = const()[name = string("op_4000_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287438336))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288257600))))[name = string("layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_21_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_21_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288257728)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4000_cast_fp16 = conv(bias = layers_21_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4000_dilations_0, groups = var_4000_groups_0, pad = var_4000_pad_0, pad_type = var_4000_pad_type_0, strides = var_4000_strides_0, weight = layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_169_cast_fp16)[name = string("op_4000_cast_fp16")];
+            string var_4006_pad_type_0 = const()[name = string("op_4006_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4006_strides_0 = const()[name = string("op_4006_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4006_pad_0 = const()[name = string("op_4006_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4006_dilations_0 = const()[name = string("op_4006_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4006_groups_0 = const()[name = string("op_4006_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288272320))), nonzero_data = tensor<fp16, [5922]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288260352))))[name = string("layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4006_cast_fp16 = conv(dilations = var_4006_dilations_0, groups = var_4006_groups_0, pad = var_4006_pad_0, pad_type = var_4006_pad_type_0, strides = var_4006_strides_0, weight = layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_169_cast_fp16)[name = string("op_4006_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_87_cast_fp16 = add(x = var_4000_cast_fp16, y = var_4006_cast_fp16)[name = string("obj_87_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = obj_87_cast_fp16)[name = string("inputs_87_cast_fp16")];
+            tensor<int32, [1]> out_87_axes_0 = const()[name = string("out_87_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4017_to_fp16 = const()[name = string("op_4017_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_87_cast_fp16 = layer_norm(axes = out_87_axes_0, epsilon = var_4017_to_fp16, x = inputs_87_cast_fp16)[name = string("out_87_cast_fp16")];
+            tensor<fp16, [1280]> input_171_gamma_0_to_fp16 = const()[name = string("input_171_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288477184)))];
+            tensor<fp16, [1280]> input_171_beta_0_to_fp16 = const()[name = string("input_171_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288479808)))];
+            fp16 input_171_epsilon_0_to_fp16 = const()[name = string("input_171_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_171_cast_fp16 = batch_norm(beta = input_171_beta_0_to_fp16, epsilon = input_171_epsilon_0_to_fp16, gamma = input_171_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_87_cast_fp16)[name = string("input_171_cast_fp16")];
+            string var_4035_pad_type_0 = const()[name = string("op_4035_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4035_strides_0 = const()[name = string("op_4035_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4035_pad_0 = const()[name = string("op_4035_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4035_dilations_0 = const()[name = string("op_4035_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4035_groups_0 = const()[name = string("op_4035_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_21_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288482432))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291759296))))[name = string("layers_21_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_21_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_21_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291759424)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4035_cast_fp16 = conv(bias = layers_21_fc1_inlier_module_bias_to_fp16, dilations = var_4035_dilations_0, groups = var_4035_groups_0, pad = var_4035_pad_0, pad_type = var_4035_pad_type_0, strides = var_4035_strides_0, weight = layers_21_fc1_inlier_module_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = string("op_4035_cast_fp16")];
+            string var_4041_pad_type_0 = const()[name = string("op_4041_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4041_strides_0 = const()[name = string("op_4041_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4041_pad_0 = const()[name = string("op_4041_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4041_dilations_0 = const()[name = string("op_4041_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4041_groups_0 = const()[name = string("op_4041_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_21_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291830912))), nonzero_data = tensor<fp16, [30547]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(291769728))))[name = string("layers_21_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4041_cast_fp16 = conv(dilations = var_4041_dilations_0, groups = var_4041_groups_0, pad = var_4041_pad_0, pad_type = var_4041_pad_type_0, strides = var_4041_strides_0, weight = layers_21_fc1_outlier_module_weight_to_fp16_sparsified, x = input_171_cast_fp16)[name = string("op_4041_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_173_cast_fp16 = add(x = var_4035_cast_fp16, y = var_4041_cast_fp16)[name = string("input_173_cast_fp16")];
+            string input_175_mode_0 = const()[name = string("input_175_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_175_cast_fp16 = gelu(mode = input_175_mode_0, x = input_173_cast_fp16)[name = string("input_175_cast_fp16")];
+            string var_4052_pad_type_0 = const()[name = string("op_4052_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4052_strides_0 = const()[name = string("op_4052_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4052_pad_0 = const()[name = string("op_4052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4052_dilations_0 = const()[name = string("op_4052_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4052_groups_0 = const()[name = string("op_4052_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_21_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(292650176))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295927040))))[name = string("layers_21_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_21_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_21_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295927168)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4052_cast_fp16 = conv(bias = layers_21_fc2_inlier_module_bias_to_fp16, dilations = var_4052_dilations_0, groups = var_4052_groups_0, pad = var_4052_pad_0, pad_type = var_4052_pad_type_0, strides = var_4052_strides_0, weight = layers_21_fc2_inlier_module_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = string("op_4052_cast_fp16")];
+            string var_4058_pad_type_0 = const()[name = string("op_4058_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4058_strides_0 = const()[name = string("op_4058_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4058_pad_0 = const()[name = string("op_4058_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4058_dilations_0 = const()[name = string("op_4058_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4058_groups_0 = const()[name = string("op_4058_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_21_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295992512))), nonzero_data = tensor<fp16, [31326]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(295929792))))[name = string("layers_21_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4058_cast_fp16 = conv(dilations = var_4058_dilations_0, groups = var_4058_groups_0, pad = var_4058_pad_0, pad_type = var_4058_pad_type_0, strides = var_4058_strides_0, weight = layers_21_fc2_outlier_module_weight_to_fp16_sparsified, x = input_175_cast_fp16)[name = string("op_4058_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_47_cast_fp16 = add(x = var_4052_cast_fp16, y = var_4058_cast_fp16)[name = string("hidden_states_47_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = hidden_states_47_cast_fp16)[name = string("inputs_89_cast_fp16")];
+            int32 var_4068 = const()[name = string("op_4068"), val = int32(3)];
+            tensor<int32, [1]> out_89_axes_0 = const()[name = string("out_89_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4087_to_fp16 = const()[name = string("op_4087_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_89_cast_fp16 = layer_norm(axes = out_89_axes_0, epsilon = var_4087_to_fp16, x = inputs_89_cast_fp16)[name = string("out_89_cast_fp16")];
+            tensor<fp16, [1280]> obj_89_gamma_0_to_fp16 = const()[name = string("obj_89_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296811776)))];
+            tensor<fp16, [1280]> obj_89_beta_0_to_fp16 = const()[name = string("obj_89_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296814400)))];
+            fp16 obj_89_epsilon_0_to_fp16 = const()[name = string("obj_89_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_89_cast_fp16 = batch_norm(beta = obj_89_beta_0_to_fp16, epsilon = obj_89_epsilon_0_to_fp16, gamma = obj_89_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_89_cast_fp16)[name = string("obj_89_cast_fp16")];
+            string var_4109_pad_type_0 = const()[name = string("op_4109_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4109_strides_0 = const()[name = string("op_4109_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4109_pad_0 = const()[name = string("op_4109_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4109_dilations_0 = const()[name = string("op_4109_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4109_groups_0 = const()[name = string("op_4109_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(296817024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297636288))))[name = string("layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_22_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_22_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297636416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4109_cast_fp16 = conv(bias = layers_22_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4109_dilations_0, groups = var_4109_groups_0, pad = var_4109_pad_0, pad_type = var_4109_pad_type_0, strides = var_4109_strides_0, weight = layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = string("op_4109_cast_fp16")];
+            string var_4115_pad_type_0 = const()[name = string("op_4115_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4115_strides_0 = const()[name = string("op_4115_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4115_pad_0 = const()[name = string("op_4115_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4115_dilations_0 = const()[name = string("op_4115_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4115_groups_0 = const()[name = string("op_4115_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297674560))), nonzero_data = tensor<fp16, [17724]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297639040))))[name = string("layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4115_cast_fp16 = conv(dilations = var_4115_dilations_0, groups = var_4115_groups_0, pad = var_4115_pad_0, pad_type = var_4115_pad_type_0, strides = var_4115_strides_0, weight = layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = string("op_4115_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_45_cast_fp16 = add(x = var_4109_cast_fp16, y = var_4115_cast_fp16)[name = string("query_45_cast_fp16")];
+            string var_4124_pad_type_0 = const()[name = string("op_4124_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4124_strides_0 = const()[name = string("op_4124_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4124_pad_0 = const()[name = string("op_4124_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4124_dilations_0 = const()[name = string("op_4124_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4124_groups_0 = const()[name = string("op_4124_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(297879424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298698688))))[name = string("layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4124_cast_fp16 = conv(dilations = var_4124_dilations_0, groups = var_4124_groups_0, pad = var_4124_pad_0, pad_type = var_4124_pad_type_0, strides = var_4124_strides_0, weight = layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = string("op_4124_cast_fp16")];
+            string var_4130_pad_type_0 = const()[name = string("op_4130_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4130_strides_0 = const()[name = string("op_4130_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4130_pad_0 = const()[name = string("op_4130_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4130_dilations_0 = const()[name = string("op_4130_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4130_groups_0 = const()[name = string("op_4130_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298728768))), nonzero_data = tensor<fp16, [14937]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298698816))))[name = string("layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4130_cast_fp16 = conv(dilations = var_4130_dilations_0, groups = var_4130_groups_0, pad = var_4130_pad_0, pad_type = var_4130_pad_type_0, strides = var_4130_strides_0, weight = layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = string("op_4130_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_45_cast_fp16 = add(x = var_4124_cast_fp16, y = var_4130_cast_fp16)[name = string("key_45_cast_fp16")];
+            string var_4140_pad_type_0 = const()[name = string("op_4140_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4140_strides_0 = const()[name = string("op_4140_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4140_pad_0 = const()[name = string("op_4140_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4140_dilations_0 = const()[name = string("op_4140_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4140_groups_0 = const()[name = string("op_4140_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298933632))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299752896))))[name = string("layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_22_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_22_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299753024)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4140_cast_fp16 = conv(bias = layers_22_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4140_dilations_0, groups = var_4140_groups_0, pad = var_4140_pad_0, pad_type = var_4140_pad_type_0, strides = var_4140_strides_0, weight = layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = string("op_4140_cast_fp16")];
+            string var_4146_pad_type_0 = const()[name = string("op_4146_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4146_strides_0 = const()[name = string("op_4146_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4146_pad_0 = const()[name = string("op_4146_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4146_dilations_0 = const()[name = string("op_4146_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4146_groups_0 = const()[name = string("op_4146_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299768064))), nonzero_data = tensor<fp16, [6148]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299755648))))[name = string("layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4146_cast_fp16 = conv(dilations = var_4146_dilations_0, groups = var_4146_groups_0, pad = var_4146_pad_0, pad_type = var_4146_pad_type_0, strides = var_4146_strides_0, weight = layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = string("op_4146_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_45_cast_fp16 = add(x = var_4140_cast_fp16, y = var_4146_cast_fp16)[name = string("value_45_cast_fp16")];
+            tensor<int32, [4]> var_4149 = const()[name = string("op_4149"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_45_cast_fp16 = reshape(shape = var_4149, x = query_45_cast_fp16)[name = string("mh_q_45_cast_fp16")];
+            fp16 var_4151_to_fp16 = const()[name = string("op_4151_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_4152_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_4151_to_fp16)[name = string("op_4152_cast_fp16")];
+            tensor<int32, [4]> var_4153 = const()[name = string("op_4153"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4154_cast_fp16 = reshape(shape = var_4153, x = key_45_cast_fp16)[name = string("op_4154_cast_fp16")];
+            bool mh_w_45_transpose_x_0 = const()[name = string("mh_w_45_transpose_x_0"), val = bool(true)];
+            bool mh_w_45_transpose_y_0 = const()[name = string("mh_w_45_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_45_cast_fp16 = matmul(transpose_x = mh_w_45_transpose_x_0, transpose_y = mh_w_45_transpose_y_0, x = var_4152_cast_fp16, y = var_4154_cast_fp16)[name = string("mh_w_45_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_4157_cast_fp16 = softmax(axis = var_4068, x = mh_w_45_cast_fp16)[name = string("op_4157_cast_fp16")];
+            tensor<int32, [4]> var_4158 = const()[name = string("op_4158"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4159_cast_fp16 = reshape(shape = var_4158, x = value_45_cast_fp16)[name = string("op_4159_cast_fp16")];
+            bool attn_45_transpose_x_0 = const()[name = string("attn_45_transpose_x_0"), val = bool(false)];
+            bool attn_45_transpose_y_0 = const()[name = string("attn_45_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_4159_cast_fp16, y = var_4157_cast_fp16)[name = string("attn_45_cast_fp16")];
+            tensor<int32, [4]> var_4162 = const()[name = string("op_4162"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_177_cast_fp16 = reshape(shape = var_4162, x = attn_45_cast_fp16)[name = string("input_177_cast_fp16")];
+            string var_4172_pad_type_0 = const()[name = string("op_4172_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4172_strides_0 = const()[name = string("op_4172_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4172_pad_0 = const()[name = string("op_4172_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4172_dilations_0 = const()[name = string("op_4172_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4172_groups_0 = const()[name = string("op_4172_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299972928))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300792192))))[name = string("layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_22_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_22_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300792320)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4172_cast_fp16 = conv(bias = layers_22_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4172_dilations_0, groups = var_4172_groups_0, pad = var_4172_pad_0, pad_type = var_4172_pad_type_0, strides = var_4172_strides_0, weight = layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_177_cast_fp16)[name = string("op_4172_cast_fp16")];
+            string var_4178_pad_type_0 = const()[name = string("op_4178_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4178_strides_0 = const()[name = string("op_4178_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4178_pad_0 = const()[name = string("op_4178_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4178_dilations_0 = const()[name = string("op_4178_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4178_groups_0 = const()[name = string("op_4178_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300807232))), nonzero_data = tensor<fp16, [6088]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(300794944))))[name = string("layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4178_cast_fp16 = conv(dilations = var_4178_dilations_0, groups = var_4178_groups_0, pad = var_4178_pad_0, pad_type = var_4178_pad_type_0, strides = var_4178_strides_0, weight = layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_177_cast_fp16)[name = string("op_4178_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_91_cast_fp16 = add(x = var_4172_cast_fp16, y = var_4178_cast_fp16)[name = string("obj_91_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = obj_91_cast_fp16)[name = string("inputs_91_cast_fp16")];
+            tensor<int32, [1]> out_91_axes_0 = const()[name = string("out_91_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4189_to_fp16 = const()[name = string("op_4189_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_4189_to_fp16, x = inputs_91_cast_fp16)[name = string("out_91_cast_fp16")];
+            tensor<fp16, [1280]> input_179_gamma_0_to_fp16 = const()[name = string("input_179_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301012096)))];
+            tensor<fp16, [1280]> input_179_beta_0_to_fp16 = const()[name = string("input_179_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301014720)))];
+            fp16 input_179_epsilon_0_to_fp16 = const()[name = string("input_179_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_179_cast_fp16 = batch_norm(beta = input_179_beta_0_to_fp16, epsilon = input_179_epsilon_0_to_fp16, gamma = input_179_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_91_cast_fp16)[name = string("input_179_cast_fp16")];
+            string var_4207_pad_type_0 = const()[name = string("op_4207_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4207_strides_0 = const()[name = string("op_4207_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4207_pad_0 = const()[name = string("op_4207_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4207_dilations_0 = const()[name = string("op_4207_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4207_groups_0 = const()[name = string("op_4207_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_22_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(301017344))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304294208))))[name = string("layers_22_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_22_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_22_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304294336)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4207_cast_fp16 = conv(bias = layers_22_fc1_inlier_module_bias_to_fp16, dilations = var_4207_dilations_0, groups = var_4207_groups_0, pad = var_4207_pad_0, pad_type = var_4207_pad_type_0, strides = var_4207_strides_0, weight = layers_22_fc1_inlier_module_weight_to_fp16_palettized, x = input_179_cast_fp16)[name = string("op_4207_cast_fp16")];
+            string var_4213_pad_type_0 = const()[name = string("op_4213_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4213_strides_0 = const()[name = string("op_4213_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4213_pad_0 = const()[name = string("op_4213_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4213_dilations_0 = const()[name = string("op_4213_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4213_groups_0 = const()[name = string("op_4213_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_22_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304372736))), nonzero_data = tensor<fp16, [34004]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(304304640))))[name = string("layers_22_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4213_cast_fp16 = conv(dilations = var_4213_dilations_0, groups = var_4213_groups_0, pad = var_4213_pad_0, pad_type = var_4213_pad_type_0, strides = var_4213_strides_0, weight = layers_22_fc1_outlier_module_weight_to_fp16_sparsified, x = input_179_cast_fp16)[name = string("op_4213_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_181_cast_fp16 = add(x = var_4207_cast_fp16, y = var_4213_cast_fp16)[name = string("input_181_cast_fp16")];
+            string input_183_mode_0 = const()[name = string("input_183_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_183_cast_fp16 = gelu(mode = input_183_mode_0, x = input_181_cast_fp16)[name = string("input_183_cast_fp16")];
+            string var_4224_pad_type_0 = const()[name = string("op_4224_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4224_strides_0 = const()[name = string("op_4224_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4224_pad_0 = const()[name = string("op_4224_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4224_dilations_0 = const()[name = string("op_4224_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4224_groups_0 = const()[name = string("op_4224_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_22_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(305192000))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308468864))))[name = string("layers_22_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_22_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_22_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308468992)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4224_cast_fp16 = conv(bias = layers_22_fc2_inlier_module_bias_to_fp16, dilations = var_4224_dilations_0, groups = var_4224_groups_0, pad = var_4224_pad_0, pad_type = var_4224_pad_type_0, strides = var_4224_strides_0, weight = layers_22_fc2_inlier_module_weight_to_fp16_palettized, x = input_183_cast_fp16)[name = string("op_4224_cast_fp16")];
+            string var_4230_pad_type_0 = const()[name = string("op_4230_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4230_strides_0 = const()[name = string("op_4230_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4230_pad_0 = const()[name = string("op_4230_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4230_dilations_0 = const()[name = string("op_4230_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4230_groups_0 = const()[name = string("op_4230_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_22_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308533376))), nonzero_data = tensor<fp16, [30830]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308471616))))[name = string("layers_22_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4230_cast_fp16 = conv(dilations = var_4230_dilations_0, groups = var_4230_groups_0, pad = var_4230_pad_0, pad_type = var_4230_pad_type_0, strides = var_4230_strides_0, weight = layers_22_fc2_outlier_module_weight_to_fp16_sparsified, x = input_183_cast_fp16)[name = string("op_4230_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_49_cast_fp16 = add(x = var_4224_cast_fp16, y = var_4230_cast_fp16)[name = string("hidden_states_49_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = hidden_states_49_cast_fp16)[name = string("inputs_93_cast_fp16")];
+            int32 var_4240 = const()[name = string("op_4240"), val = int32(3)];
+            tensor<int32, [1]> out_93_axes_0 = const()[name = string("out_93_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4259_to_fp16 = const()[name = string("op_4259_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_93_cast_fp16 = layer_norm(axes = out_93_axes_0, epsilon = var_4259_to_fp16, x = inputs_93_cast_fp16)[name = string("out_93_cast_fp16")];
+            tensor<fp16, [1280]> obj_93_gamma_0_to_fp16 = const()[name = string("obj_93_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309352640)))];
+            tensor<fp16, [1280]> obj_93_beta_0_to_fp16 = const()[name = string("obj_93_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309355264)))];
+            fp16 obj_93_epsilon_0_to_fp16 = const()[name = string("obj_93_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_93_cast_fp16)[name = string("obj_93_cast_fp16")];
+            string var_4281_pad_type_0 = const()[name = string("op_4281_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4281_strides_0 = const()[name = string("op_4281_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4281_pad_0 = const()[name = string("op_4281_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4281_dilations_0 = const()[name = string("op_4281_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4281_groups_0 = const()[name = string("op_4281_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309357888))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310177152))))[name = string("layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_23_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_23_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310177280)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4281_cast_fp16 = conv(bias = layers_23_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4281_dilations_0, groups = var_4281_groups_0, pad = var_4281_pad_0, pad_type = var_4281_pad_type_0, strides = var_4281_strides_0, weight = layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = string("op_4281_cast_fp16")];
+            string var_4287_pad_type_0 = const()[name = string("op_4287_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4287_strides_0 = const()[name = string("op_4287_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4287_pad_0 = const()[name = string("op_4287_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4287_dilations_0 = const()[name = string("op_4287_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4287_groups_0 = const()[name = string("op_4287_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310208448))), nonzero_data = tensor<fp16, [14215]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310179904))))[name = string("layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4287_cast_fp16 = conv(dilations = var_4287_dilations_0, groups = var_4287_groups_0, pad = var_4287_pad_0, pad_type = var_4287_pad_type_0, strides = var_4287_strides_0, weight = layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = string("op_4287_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_47_cast_fp16 = add(x = var_4281_cast_fp16, y = var_4287_cast_fp16)[name = string("query_47_cast_fp16")];
+            string var_4296_pad_type_0 = const()[name = string("op_4296_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4296_strides_0 = const()[name = string("op_4296_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4296_pad_0 = const()[name = string("op_4296_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4296_dilations_0 = const()[name = string("op_4296_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4296_groups_0 = const()[name = string("op_4296_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(310413312))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311232576))))[name = string("layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4296_cast_fp16 = conv(dilations = var_4296_dilations_0, groups = var_4296_groups_0, pad = var_4296_pad_0, pad_type = var_4296_pad_type_0, strides = var_4296_strides_0, weight = layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = string("op_4296_cast_fp16")];
+            string var_4302_pad_type_0 = const()[name = string("op_4302_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4302_strides_0 = const()[name = string("op_4302_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4302_pad_0 = const()[name = string("op_4302_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4302_dilations_0 = const()[name = string("op_4302_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4302_groups_0 = const()[name = string("op_4302_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311257088))), nonzero_data = tensor<fp16, [12160]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311232704))))[name = string("layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4302_cast_fp16 = conv(dilations = var_4302_dilations_0, groups = var_4302_groups_0, pad = var_4302_pad_0, pad_type = var_4302_pad_type_0, strides = var_4302_strides_0, weight = layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = string("op_4302_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_47_cast_fp16 = add(x = var_4296_cast_fp16, y = var_4302_cast_fp16)[name = string("key_47_cast_fp16")];
+            string var_4312_pad_type_0 = const()[name = string("op_4312_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4312_strides_0 = const()[name = string("op_4312_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4312_pad_0 = const()[name = string("op_4312_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4312_dilations_0 = const()[name = string("op_4312_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4312_groups_0 = const()[name = string("op_4312_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311461952))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312281216))))[name = string("layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_23_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_23_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312281344)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4312_cast_fp16 = conv(bias = layers_23_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4312_dilations_0, groups = var_4312_groups_0, pad = var_4312_pad_0, pad_type = var_4312_pad_type_0, strides = var_4312_strides_0, weight = layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = string("op_4312_cast_fp16")];
+            string var_4318_pad_type_0 = const()[name = string("op_4318_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4318_strides_0 = const()[name = string("op_4318_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4318_pad_0 = const()[name = string("op_4318_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4318_dilations_0 = const()[name = string("op_4318_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4318_groups_0 = const()[name = string("op_4318_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312297152))), nonzero_data = tensor<fp16, [6559]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312283968))))[name = string("layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4318_cast_fp16 = conv(dilations = var_4318_dilations_0, groups = var_4318_groups_0, pad = var_4318_pad_0, pad_type = var_4318_pad_type_0, strides = var_4318_strides_0, weight = layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = string("op_4318_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_47_cast_fp16 = add(x = var_4312_cast_fp16, y = var_4318_cast_fp16)[name = string("value_47_cast_fp16")];
+            tensor<int32, [4]> var_4321 = const()[name = string("op_4321"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_47_cast_fp16 = reshape(shape = var_4321, x = query_47_cast_fp16)[name = string("mh_q_47_cast_fp16")];
+            fp16 var_4323_to_fp16 = const()[name = string("op_4323_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_4324_cast_fp16 = mul(x = mh_q_47_cast_fp16, y = var_4323_to_fp16)[name = string("op_4324_cast_fp16")];
+            tensor<int32, [4]> var_4325 = const()[name = string("op_4325"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4326_cast_fp16 = reshape(shape = var_4325, x = key_47_cast_fp16)[name = string("op_4326_cast_fp16")];
+            bool mh_w_47_transpose_x_0 = const()[name = string("mh_w_47_transpose_x_0"), val = bool(true)];
+            bool mh_w_47_transpose_y_0 = const()[name = string("mh_w_47_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_47_cast_fp16 = matmul(transpose_x = mh_w_47_transpose_x_0, transpose_y = mh_w_47_transpose_y_0, x = var_4324_cast_fp16, y = var_4326_cast_fp16)[name = string("mh_w_47_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_4329_cast_fp16 = softmax(axis = var_4240, x = mh_w_47_cast_fp16)[name = string("op_4329_cast_fp16")];
+            tensor<int32, [4]> var_4330 = const()[name = string("op_4330"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4331_cast_fp16 = reshape(shape = var_4330, x = value_47_cast_fp16)[name = string("op_4331_cast_fp16")];
+            bool attn_47_transpose_x_0 = const()[name = string("attn_47_transpose_x_0"), val = bool(false)];
+            bool attn_47_transpose_y_0 = const()[name = string("attn_47_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_47_cast_fp16 = matmul(transpose_x = attn_47_transpose_x_0, transpose_y = attn_47_transpose_y_0, x = var_4331_cast_fp16, y = var_4329_cast_fp16)[name = string("attn_47_cast_fp16")];
+            tensor<int32, [4]> var_4334 = const()[name = string("op_4334"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_185_cast_fp16 = reshape(shape = var_4334, x = attn_47_cast_fp16)[name = string("input_185_cast_fp16")];
+            string var_4344_pad_type_0 = const()[name = string("op_4344_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4344_strides_0 = const()[name = string("op_4344_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4344_pad_0 = const()[name = string("op_4344_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4344_dilations_0 = const()[name = string("op_4344_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4344_groups_0 = const()[name = string("op_4344_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(312502016))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313321280))))[name = string("layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_23_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_23_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313321408)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4344_cast_fp16 = conv(bias = layers_23_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4344_dilations_0, groups = var_4344_groups_0, pad = var_4344_pad_0, pad_type = var_4344_pad_type_0, strides = var_4344_strides_0, weight = layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = string("op_4344_cast_fp16")];
+            string var_4350_pad_type_0 = const()[name = string("op_4350_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4350_strides_0 = const()[name = string("op_4350_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4350_pad_0 = const()[name = string("op_4350_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4350_dilations_0 = const()[name = string("op_4350_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4350_groups_0 = const()[name = string("op_4350_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313337280))), nonzero_data = tensor<fp16, [6574]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313324032))))[name = string("layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4350_cast_fp16 = conv(dilations = var_4350_dilations_0, groups = var_4350_groups_0, pad = var_4350_pad_0, pad_type = var_4350_pad_type_0, strides = var_4350_strides_0, weight = layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_185_cast_fp16)[name = string("op_4350_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_95_cast_fp16 = add(x = var_4344_cast_fp16, y = var_4350_cast_fp16)[name = string("obj_95_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = obj_95_cast_fp16)[name = string("inputs_95_cast_fp16")];
+            tensor<int32, [1]> out_95_axes_0 = const()[name = string("out_95_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4361_to_fp16 = const()[name = string("op_4361_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_95_cast_fp16 = layer_norm(axes = out_95_axes_0, epsilon = var_4361_to_fp16, x = inputs_95_cast_fp16)[name = string("out_95_cast_fp16")];
+            tensor<fp16, [1280]> input_187_gamma_0_to_fp16 = const()[name = string("input_187_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313542144)))];
+            tensor<fp16, [1280]> input_187_beta_0_to_fp16 = const()[name = string("input_187_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313544768)))];
+            fp16 input_187_epsilon_0_to_fp16 = const()[name = string("input_187_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_187_cast_fp16 = batch_norm(beta = input_187_beta_0_to_fp16, epsilon = input_187_epsilon_0_to_fp16, gamma = input_187_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_95_cast_fp16)[name = string("input_187_cast_fp16")];
+            string var_4379_pad_type_0 = const()[name = string("op_4379_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4379_strides_0 = const()[name = string("op_4379_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4379_pad_0 = const()[name = string("op_4379_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4379_dilations_0 = const()[name = string("op_4379_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4379_groups_0 = const()[name = string("op_4379_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_23_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(313547392))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316824256))))[name = string("layers_23_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_23_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_23_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316824384)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4379_cast_fp16 = conv(bias = layers_23_fc1_inlier_module_bias_to_fp16, dilations = var_4379_dilations_0, groups = var_4379_groups_0, pad = var_4379_pad_0, pad_type = var_4379_pad_type_0, strides = var_4379_strides_0, weight = layers_23_fc1_inlier_module_weight_to_fp16_palettized, x = input_187_cast_fp16)[name = string("op_4379_cast_fp16")];
+            string var_4385_pad_type_0 = const()[name = string("op_4385_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4385_strides_0 = const()[name = string("op_4385_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4385_pad_0 = const()[name = string("op_4385_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4385_dilations_0 = const()[name = string("op_4385_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4385_groups_0 = const()[name = string("op_4385_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_23_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316896960))), nonzero_data = tensor<fp16, [31092]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(316834688))))[name = string("layers_23_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4385_cast_fp16 = conv(dilations = var_4385_dilations_0, groups = var_4385_groups_0, pad = var_4385_pad_0, pad_type = var_4385_pad_type_0, strides = var_4385_strides_0, weight = layers_23_fc1_outlier_module_weight_to_fp16_sparsified, x = input_187_cast_fp16)[name = string("op_4385_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_189_cast_fp16 = add(x = var_4379_cast_fp16, y = var_4385_cast_fp16)[name = string("input_189_cast_fp16")];
+            string input_191_mode_0 = const()[name = string("input_191_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = string("input_191_cast_fp16")];
+            string var_4396_pad_type_0 = const()[name = string("op_4396_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4396_strides_0 = const()[name = string("op_4396_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4396_pad_0 = const()[name = string("op_4396_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4396_dilations_0 = const()[name = string("op_4396_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4396_groups_0 = const()[name = string("op_4396_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_23_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317716224))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320993088))))[name = string("layers_23_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_23_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_23_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320993216)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4396_cast_fp16 = conv(bias = layers_23_fc2_inlier_module_bias_to_fp16, dilations = var_4396_dilations_0, groups = var_4396_groups_0, pad = var_4396_pad_0, pad_type = var_4396_pad_type_0, strides = var_4396_strides_0, weight = layers_23_fc2_inlier_module_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = string("op_4396_cast_fp16")];
+            string var_4402_pad_type_0 = const()[name = string("op_4402_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4402_strides_0 = const()[name = string("op_4402_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4402_pad_0 = const()[name = string("op_4402_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4402_dilations_0 = const()[name = string("op_4402_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4402_groups_0 = const()[name = string("op_4402_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_23_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321052864))), nonzero_data = tensor<fp16, [28479]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320995840))))[name = string("layers_23_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4402_cast_fp16 = conv(dilations = var_4402_dilations_0, groups = var_4402_groups_0, pad = var_4402_pad_0, pad_type = var_4402_pad_type_0, strides = var_4402_strides_0, weight = layers_23_fc2_outlier_module_weight_to_fp16_sparsified, x = input_191_cast_fp16)[name = string("op_4402_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_51_cast_fp16 = add(x = var_4396_cast_fp16, y = var_4402_cast_fp16)[name = string("hidden_states_51_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = hidden_states_51_cast_fp16)[name = string("inputs_97_cast_fp16")];
+            int32 var_4412 = const()[name = string("op_4412"), val = int32(3)];
+            tensor<int32, [1]> out_97_axes_0 = const()[name = string("out_97_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4431_to_fp16 = const()[name = string("op_4431_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_4431_to_fp16, x = inputs_97_cast_fp16)[name = string("out_97_cast_fp16")];
+            tensor<fp16, [1280]> obj_97_gamma_0_to_fp16 = const()[name = string("obj_97_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321872128)))];
+            tensor<fp16, [1280]> obj_97_beta_0_to_fp16 = const()[name = string("obj_97_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321874752)))];
+            fp16 obj_97_epsilon_0_to_fp16 = const()[name = string("obj_97_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_97_cast_fp16 = batch_norm(beta = obj_97_beta_0_to_fp16, epsilon = obj_97_epsilon_0_to_fp16, gamma = obj_97_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_97_cast_fp16)[name = string("obj_97_cast_fp16")];
+            string var_4453_pad_type_0 = const()[name = string("op_4453_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4453_strides_0 = const()[name = string("op_4453_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4453_pad_0 = const()[name = string("op_4453_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4453_dilations_0 = const()[name = string("op_4453_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4453_groups_0 = const()[name = string("op_4453_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321877376))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322696640))))[name = string("layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_24_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_24_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322696768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4453_cast_fp16 = conv(bias = layers_24_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4453_dilations_0, groups = var_4453_groups_0, pad = var_4453_pad_0, pad_type = var_4453_pad_type_0, strides = var_4453_strides_0, weight = layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_4453_cast_fp16")];
+            string var_4459_pad_type_0 = const()[name = string("op_4459_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4459_strides_0 = const()[name = string("op_4459_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4459_pad_0 = const()[name = string("op_4459_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4459_dilations_0 = const()[name = string("op_4459_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4459_groups_0 = const()[name = string("op_4459_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322733056))), nonzero_data = tensor<fp16, [16797]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322699392))))[name = string("layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4459_cast_fp16 = conv(dilations = var_4459_dilations_0, groups = var_4459_groups_0, pad = var_4459_pad_0, pad_type = var_4459_pad_type_0, strides = var_4459_strides_0, weight = layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_4459_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_49_cast_fp16 = add(x = var_4453_cast_fp16, y = var_4459_cast_fp16)[name = string("query_49_cast_fp16")];
+            string var_4468_pad_type_0 = const()[name = string("op_4468_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4468_strides_0 = const()[name = string("op_4468_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4468_pad_0 = const()[name = string("op_4468_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4468_dilations_0 = const()[name = string("op_4468_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4468_groups_0 = const()[name = string("op_4468_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322937920))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323757184))))[name = string("layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4468_cast_fp16 = conv(dilations = var_4468_dilations_0, groups = var_4468_groups_0, pad = var_4468_pad_0, pad_type = var_4468_pad_type_0, strides = var_4468_strides_0, weight = layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_4468_cast_fp16")];
+            string var_4474_pad_type_0 = const()[name = string("op_4474_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4474_strides_0 = const()[name = string("op_4474_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4474_pad_0 = const()[name = string("op_4474_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4474_dilations_0 = const()[name = string("op_4474_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4474_groups_0 = const()[name = string("op_4474_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323783744))), nonzero_data = tensor<fp16, [13170]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323757312))))[name = string("layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4474_cast_fp16 = conv(dilations = var_4474_dilations_0, groups = var_4474_groups_0, pad = var_4474_pad_0, pad_type = var_4474_pad_type_0, strides = var_4474_strides_0, weight = layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_4474_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_49_cast_fp16 = add(x = var_4468_cast_fp16, y = var_4474_cast_fp16)[name = string("key_49_cast_fp16")];
+            string var_4484_pad_type_0 = const()[name = string("op_4484_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4484_strides_0 = const()[name = string("op_4484_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4484_pad_0 = const()[name = string("op_4484_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4484_dilations_0 = const()[name = string("op_4484_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4484_groups_0 = const()[name = string("op_4484_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323988608))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324807872))))[name = string("layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_24_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_24_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324808000)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4484_cast_fp16 = conv(bias = layers_24_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4484_dilations_0, groups = var_4484_groups_0, pad = var_4484_pad_0, pad_type = var_4484_pad_type_0, strides = var_4484_strides_0, weight = layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_4484_cast_fp16")];
+            string var_4490_pad_type_0 = const()[name = string("op_4490_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4490_strides_0 = const()[name = string("op_4490_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4490_pad_0 = const()[name = string("op_4490_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4490_dilations_0 = const()[name = string("op_4490_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4490_groups_0 = const()[name = string("op_4490_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324822080))), nonzero_data = tensor<fp16, [5691]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324810624))))[name = string("layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4490_cast_fp16 = conv(dilations = var_4490_dilations_0, groups = var_4490_groups_0, pad = var_4490_pad_0, pad_type = var_4490_pad_type_0, strides = var_4490_strides_0, weight = layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_4490_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_49_cast_fp16 = add(x = var_4484_cast_fp16, y = var_4490_cast_fp16)[name = string("value_49_cast_fp16")];
+            tensor<int32, [4]> var_4493 = const()[name = string("op_4493"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_49_cast_fp16 = reshape(shape = var_4493, x = query_49_cast_fp16)[name = string("mh_q_49_cast_fp16")];
+            fp16 var_4495_to_fp16 = const()[name = string("op_4495_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_4496_cast_fp16 = mul(x = mh_q_49_cast_fp16, y = var_4495_to_fp16)[name = string("op_4496_cast_fp16")];
+            tensor<int32, [4]> var_4497 = const()[name = string("op_4497"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4498_cast_fp16 = reshape(shape = var_4497, x = key_49_cast_fp16)[name = string("op_4498_cast_fp16")];
+            bool mh_w_49_transpose_x_0 = const()[name = string("mh_w_49_transpose_x_0"), val = bool(true)];
+            bool mh_w_49_transpose_y_0 = const()[name = string("mh_w_49_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_4496_cast_fp16, y = var_4498_cast_fp16)[name = string("mh_w_49_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_4501_cast_fp16 = softmax(axis = var_4412, x = mh_w_49_cast_fp16)[name = string("op_4501_cast_fp16")];
+            tensor<int32, [4]> var_4502 = const()[name = string("op_4502"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4503_cast_fp16 = reshape(shape = var_4502, x = value_49_cast_fp16)[name = string("op_4503_cast_fp16")];
+            bool attn_49_transpose_x_0 = const()[name = string("attn_49_transpose_x_0"), val = bool(false)];
+            bool attn_49_transpose_y_0 = const()[name = string("attn_49_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_49_cast_fp16 = matmul(transpose_x = attn_49_transpose_x_0, transpose_y = attn_49_transpose_y_0, x = var_4503_cast_fp16, y = var_4501_cast_fp16)[name = string("attn_49_cast_fp16")];
+            tensor<int32, [4]> var_4506 = const()[name = string("op_4506"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_193_cast_fp16 = reshape(shape = var_4506, x = attn_49_cast_fp16)[name = string("input_193_cast_fp16")];
+            string var_4516_pad_type_0 = const()[name = string("op_4516_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4516_strides_0 = const()[name = string("op_4516_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4516_pad_0 = const()[name = string("op_4516_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4516_dilations_0 = const()[name = string("op_4516_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4516_groups_0 = const()[name = string("op_4516_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325026944))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325846208))))[name = string("layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_24_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_24_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325846336)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4516_cast_fp16 = conv(bias = layers_24_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4516_dilations_0, groups = var_4516_groups_0, pad = var_4516_pad_0, pad_type = var_4516_pad_type_0, strides = var_4516_strides_0, weight = layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_193_cast_fp16)[name = string("op_4516_cast_fp16")];
+            string var_4522_pad_type_0 = const()[name = string("op_4522_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4522_strides_0 = const()[name = string("op_4522_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4522_pad_0 = const()[name = string("op_4522_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4522_dilations_0 = const()[name = string("op_4522_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4522_groups_0 = const()[name = string("op_4522_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325859712))), nonzero_data = tensor<fp16, [5325]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(325848960))))[name = string("layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4522_cast_fp16 = conv(dilations = var_4522_dilations_0, groups = var_4522_groups_0, pad = var_4522_pad_0, pad_type = var_4522_pad_type_0, strides = var_4522_strides_0, weight = layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_193_cast_fp16)[name = string("op_4522_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_99_cast_fp16 = add(x = var_4516_cast_fp16, y = var_4522_cast_fp16)[name = string("obj_99_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_99_cast_fp16 = add(x = inputs_97_cast_fp16, y = obj_99_cast_fp16)[name = string("inputs_99_cast_fp16")];
+            tensor<int32, [1]> out_99_axes_0 = const()[name = string("out_99_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4533_to_fp16 = const()[name = string("op_4533_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_99_cast_fp16 = layer_norm(axes = out_99_axes_0, epsilon = var_4533_to_fp16, x = inputs_99_cast_fp16)[name = string("out_99_cast_fp16")];
+            tensor<fp16, [1280]> input_195_gamma_0_to_fp16 = const()[name = string("input_195_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326064576)))];
+            tensor<fp16, [1280]> input_195_beta_0_to_fp16 = const()[name = string("input_195_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326067200)))];
+            fp16 input_195_epsilon_0_to_fp16 = const()[name = string("input_195_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_195_cast_fp16 = batch_norm(beta = input_195_beta_0_to_fp16, epsilon = input_195_epsilon_0_to_fp16, gamma = input_195_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_99_cast_fp16)[name = string("input_195_cast_fp16")];
+            string var_4551_pad_type_0 = const()[name = string("op_4551_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4551_strides_0 = const()[name = string("op_4551_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4551_pad_0 = const()[name = string("op_4551_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4551_dilations_0 = const()[name = string("op_4551_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4551_groups_0 = const()[name = string("op_4551_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_24_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(326069824))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329346688))))[name = string("layers_24_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_24_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_24_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329346816)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4551_cast_fp16 = conv(bias = layers_24_fc1_inlier_module_bias_to_fp16, dilations = var_4551_dilations_0, groups = var_4551_groups_0, pad = var_4551_pad_0, pad_type = var_4551_pad_type_0, strides = var_4551_strides_0, weight = layers_24_fc1_inlier_module_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = string("op_4551_cast_fp16")];
+            string var_4557_pad_type_0 = const()[name = string("op_4557_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4557_strides_0 = const()[name = string("op_4557_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4557_pad_0 = const()[name = string("op_4557_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4557_dilations_0 = const()[name = string("op_4557_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4557_groups_0 = const()[name = string("op_4557_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_24_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329417984))), nonzero_data = tensor<fp16, [30384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(329357120))))[name = string("layers_24_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4557_cast_fp16 = conv(dilations = var_4557_dilations_0, groups = var_4557_groups_0, pad = var_4557_pad_0, pad_type = var_4557_pad_type_0, strides = var_4557_strides_0, weight = layers_24_fc1_outlier_module_weight_to_fp16_sparsified, x = input_195_cast_fp16)[name = string("op_4557_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_197_cast_fp16 = add(x = var_4551_cast_fp16, y = var_4557_cast_fp16)[name = string("input_197_cast_fp16")];
+            string input_199_mode_0 = const()[name = string("input_199_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_199_cast_fp16 = gelu(mode = input_199_mode_0, x = input_197_cast_fp16)[name = string("input_199_cast_fp16")];
+            string var_4568_pad_type_0 = const()[name = string("op_4568_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4568_strides_0 = const()[name = string("op_4568_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4568_pad_0 = const()[name = string("op_4568_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4568_dilations_0 = const()[name = string("op_4568_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4568_groups_0 = const()[name = string("op_4568_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_24_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330237248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333514112))))[name = string("layers_24_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_24_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_24_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333514240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4568_cast_fp16 = conv(bias = layers_24_fc2_inlier_module_bias_to_fp16, dilations = var_4568_dilations_0, groups = var_4568_groups_0, pad = var_4568_pad_0, pad_type = var_4568_pad_type_0, strides = var_4568_strides_0, weight = layers_24_fc2_inlier_module_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = string("op_4568_cast_fp16")];
+            string var_4574_pad_type_0 = const()[name = string("op_4574_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4574_strides_0 = const()[name = string("op_4574_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4574_pad_0 = const()[name = string("op_4574_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4574_dilations_0 = const()[name = string("op_4574_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4574_groups_0 = const()[name = string("op_4574_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_24_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333569216))), nonzero_data = tensor<fp16, [26123]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333516864))))[name = string("layers_24_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4574_cast_fp16 = conv(dilations = var_4574_dilations_0, groups = var_4574_groups_0, pad = var_4574_pad_0, pad_type = var_4574_pad_type_0, strides = var_4574_strides_0, weight = layers_24_fc2_outlier_module_weight_to_fp16_sparsified, x = input_199_cast_fp16)[name = string("op_4574_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_53_cast_fp16 = add(x = var_4568_cast_fp16, y = var_4574_cast_fp16)[name = string("hidden_states_53_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = hidden_states_53_cast_fp16)[name = string("inputs_101_cast_fp16")];
+            int32 var_4584 = const()[name = string("op_4584"), val = int32(3)];
+            tensor<int32, [1]> out_101_axes_0 = const()[name = string("out_101_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4603_to_fp16 = const()[name = string("op_4603_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_101_cast_fp16 = layer_norm(axes = out_101_axes_0, epsilon = var_4603_to_fp16, x = inputs_101_cast_fp16)[name = string("out_101_cast_fp16")];
+            tensor<fp16, [1280]> obj_101_gamma_0_to_fp16 = const()[name = string("obj_101_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334388480)))];
+            tensor<fp16, [1280]> obj_101_beta_0_to_fp16 = const()[name = string("obj_101_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334391104)))];
+            fp16 obj_101_epsilon_0_to_fp16 = const()[name = string("obj_101_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_101_cast_fp16 = batch_norm(beta = obj_101_beta_0_to_fp16, epsilon = obj_101_epsilon_0_to_fp16, gamma = obj_101_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_101_cast_fp16)[name = string("obj_101_cast_fp16")];
+            string var_4625_pad_type_0 = const()[name = string("op_4625_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4625_strides_0 = const()[name = string("op_4625_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4625_pad_0 = const()[name = string("op_4625_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4625_dilations_0 = const()[name = string("op_4625_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4625_groups_0 = const()[name = string("op_4625_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334393728))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335212992))))[name = string("layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_25_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_25_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335213120)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4625_cast_fp16 = conv(bias = layers_25_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4625_dilations_0, groups = var_4625_groups_0, pad = var_4625_pad_0, pad_type = var_4625_pad_type_0, strides = var_4625_strides_0, weight = layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = string("op_4625_cast_fp16")];
+            string var_4631_pad_type_0 = const()[name = string("op_4631_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4631_strides_0 = const()[name = string("op_4631_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4631_pad_0 = const()[name = string("op_4631_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4631_dilations_0 = const()[name = string("op_4631_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4631_groups_0 = const()[name = string("op_4631_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335253376))), nonzero_data = tensor<fp16, [18769]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335215744))))[name = string("layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4631_cast_fp16 = conv(dilations = var_4631_dilations_0, groups = var_4631_groups_0, pad = var_4631_pad_0, pad_type = var_4631_pad_type_0, strides = var_4631_strides_0, weight = layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = string("op_4631_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_51_cast_fp16 = add(x = var_4625_cast_fp16, y = var_4631_cast_fp16)[name = string("query_51_cast_fp16")];
+            string var_4640_pad_type_0 = const()[name = string("op_4640_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4640_strides_0 = const()[name = string("op_4640_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4640_pad_0 = const()[name = string("op_4640_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4640_dilations_0 = const()[name = string("op_4640_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4640_groups_0 = const()[name = string("op_4640_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(335458240))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336277504))))[name = string("layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4640_cast_fp16 = conv(dilations = var_4640_dilations_0, groups = var_4640_groups_0, pad = var_4640_pad_0, pad_type = var_4640_pad_type_0, strides = var_4640_strides_0, weight = layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = string("op_4640_cast_fp16")];
+            string var_4646_pad_type_0 = const()[name = string("op_4646_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4646_strides_0 = const()[name = string("op_4646_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4646_pad_0 = const()[name = string("op_4646_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4646_dilations_0 = const()[name = string("op_4646_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4646_groups_0 = const()[name = string("op_4646_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336302912))), nonzero_data = tensor<fp16, [12599]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336277632))))[name = string("layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4646_cast_fp16 = conv(dilations = var_4646_dilations_0, groups = var_4646_groups_0, pad = var_4646_pad_0, pad_type = var_4646_pad_type_0, strides = var_4646_strides_0, weight = layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = string("op_4646_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_51_cast_fp16 = add(x = var_4640_cast_fp16, y = var_4646_cast_fp16)[name = string("key_51_cast_fp16")];
+            string var_4656_pad_type_0 = const()[name = string("op_4656_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4656_strides_0 = const()[name = string("op_4656_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4656_pad_0 = const()[name = string("op_4656_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4656_dilations_0 = const()[name = string("op_4656_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4656_groups_0 = const()[name = string("op_4656_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336507776))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337327040))))[name = string("layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_25_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_25_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337327168)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4656_cast_fp16 = conv(bias = layers_25_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4656_dilations_0, groups = var_4656_groups_0, pad = var_4656_pad_0, pad_type = var_4656_pad_type_0, strides = var_4656_strides_0, weight = layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = string("op_4656_cast_fp16")];
+            string var_4662_pad_type_0 = const()[name = string("op_4662_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4662_strides_0 = const()[name = string("op_4662_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4662_pad_0 = const()[name = string("op_4662_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4662_dilations_0 = const()[name = string("op_4662_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4662_groups_0 = const()[name = string("op_4662_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337341760))), nonzero_data = tensor<fp16, [5951]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337329792))))[name = string("layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4662_cast_fp16 = conv(dilations = var_4662_dilations_0, groups = var_4662_groups_0, pad = var_4662_pad_0, pad_type = var_4662_pad_type_0, strides = var_4662_strides_0, weight = layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = string("op_4662_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_51_cast_fp16 = add(x = var_4656_cast_fp16, y = var_4662_cast_fp16)[name = string("value_51_cast_fp16")];
+            tensor<int32, [4]> var_4665 = const()[name = string("op_4665"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_51_cast_fp16 = reshape(shape = var_4665, x = query_51_cast_fp16)[name = string("mh_q_51_cast_fp16")];
+            fp16 var_4667_to_fp16 = const()[name = string("op_4667_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_4668_cast_fp16 = mul(x = mh_q_51_cast_fp16, y = var_4667_to_fp16)[name = string("op_4668_cast_fp16")];
+            tensor<int32, [4]> var_4669 = const()[name = string("op_4669"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4670_cast_fp16 = reshape(shape = var_4669, x = key_51_cast_fp16)[name = string("op_4670_cast_fp16")];
+            bool mh_w_51_transpose_x_0 = const()[name = string("mh_w_51_transpose_x_0"), val = bool(true)];
+            bool mh_w_51_transpose_y_0 = const()[name = string("mh_w_51_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_51_cast_fp16 = matmul(transpose_x = mh_w_51_transpose_x_0, transpose_y = mh_w_51_transpose_y_0, x = var_4668_cast_fp16, y = var_4670_cast_fp16)[name = string("mh_w_51_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_4673_cast_fp16 = softmax(axis = var_4584, x = mh_w_51_cast_fp16)[name = string("op_4673_cast_fp16")];
+            tensor<int32, [4]> var_4674 = const()[name = string("op_4674"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4675_cast_fp16 = reshape(shape = var_4674, x = value_51_cast_fp16)[name = string("op_4675_cast_fp16")];
+            bool attn_51_transpose_x_0 = const()[name = string("attn_51_transpose_x_0"), val = bool(false)];
+            bool attn_51_transpose_y_0 = const()[name = string("attn_51_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_51_cast_fp16 = matmul(transpose_x = attn_51_transpose_x_0, transpose_y = attn_51_transpose_y_0, x = var_4675_cast_fp16, y = var_4673_cast_fp16)[name = string("attn_51_cast_fp16")];
+            tensor<int32, [4]> var_4678 = const()[name = string("op_4678"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_201_cast_fp16 = reshape(shape = var_4678, x = attn_51_cast_fp16)[name = string("input_201_cast_fp16")];
+            string var_4688_pad_type_0 = const()[name = string("op_4688_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4688_strides_0 = const()[name = string("op_4688_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4688_pad_0 = const()[name = string("op_4688_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4688_dilations_0 = const()[name = string("op_4688_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4688_groups_0 = const()[name = string("op_4688_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(337546624))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338365888))))[name = string("layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_25_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_25_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338366016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4688_cast_fp16 = conv(bias = layers_25_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4688_dilations_0, groups = var_4688_groups_0, pad = var_4688_pad_0, pad_type = var_4688_pad_type_0, strides = var_4688_strides_0, weight = layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = string("op_4688_cast_fp16")];
+            string var_4694_pad_type_0 = const()[name = string("op_4694_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4694_strides_0 = const()[name = string("op_4694_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4694_pad_0 = const()[name = string("op_4694_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4694_dilations_0 = const()[name = string("op_4694_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4694_groups_0 = const()[name = string("op_4694_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338379584))), nonzero_data = tensor<fp16, [5419]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338368640))))[name = string("layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4694_cast_fp16 = conv(dilations = var_4694_dilations_0, groups = var_4694_groups_0, pad = var_4694_pad_0, pad_type = var_4694_pad_type_0, strides = var_4694_strides_0, weight = layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_201_cast_fp16)[name = string("op_4694_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_103_cast_fp16 = add(x = var_4688_cast_fp16, y = var_4694_cast_fp16)[name = string("obj_103_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = obj_103_cast_fp16)[name = string("inputs_103_cast_fp16")];
+            tensor<int32, [1]> out_103_axes_0 = const()[name = string("out_103_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4705_to_fp16 = const()[name = string("op_4705_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_4705_to_fp16, x = inputs_103_cast_fp16)[name = string("out_103_cast_fp16")];
+            tensor<fp16, [1280]> input_203_gamma_0_to_fp16 = const()[name = string("input_203_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338584448)))];
+            tensor<fp16, [1280]> input_203_beta_0_to_fp16 = const()[name = string("input_203_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338587072)))];
+            fp16 input_203_epsilon_0_to_fp16 = const()[name = string("input_203_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_203_cast_fp16 = batch_norm(beta = input_203_beta_0_to_fp16, epsilon = input_203_epsilon_0_to_fp16, gamma = input_203_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_103_cast_fp16)[name = string("input_203_cast_fp16")];
+            string var_4723_pad_type_0 = const()[name = string("op_4723_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4723_strides_0 = const()[name = string("op_4723_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4723_pad_0 = const()[name = string("op_4723_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4723_dilations_0 = const()[name = string("op_4723_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4723_groups_0 = const()[name = string("op_4723_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_25_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(338589696))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341866560))))[name = string("layers_25_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_25_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_25_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341866688)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4723_cast_fp16 = conv(bias = layers_25_fc1_inlier_module_bias_to_fp16, dilations = var_4723_dilations_0, groups = var_4723_groups_0, pad = var_4723_pad_0, pad_type = var_4723_pad_type_0, strides = var_4723_strides_0, weight = layers_25_fc1_inlier_module_weight_to_fp16_palettized, x = input_203_cast_fp16)[name = string("op_4723_cast_fp16")];
+            string var_4729_pad_type_0 = const()[name = string("op_4729_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4729_strides_0 = const()[name = string("op_4729_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4729_pad_0 = const()[name = string("op_4729_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4729_dilations_0 = const()[name = string("op_4729_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4729_groups_0 = const()[name = string("op_4729_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_25_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341933760))), nonzero_data = tensor<fp16, [28333]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(341876992))))[name = string("layers_25_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4729_cast_fp16 = conv(dilations = var_4729_dilations_0, groups = var_4729_groups_0, pad = var_4729_pad_0, pad_type = var_4729_pad_type_0, strides = var_4729_strides_0, weight = layers_25_fc1_outlier_module_weight_to_fp16_sparsified, x = input_203_cast_fp16)[name = string("op_4729_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_205_cast_fp16 = add(x = var_4723_cast_fp16, y = var_4729_cast_fp16)[name = string("input_205_cast_fp16")];
+            string input_207_mode_0 = const()[name = string("input_207_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_207_cast_fp16 = gelu(mode = input_207_mode_0, x = input_205_cast_fp16)[name = string("input_207_cast_fp16")];
+            string var_4740_pad_type_0 = const()[name = string("op_4740_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4740_strides_0 = const()[name = string("op_4740_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4740_pad_0 = const()[name = string("op_4740_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4740_dilations_0 = const()[name = string("op_4740_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4740_groups_0 = const()[name = string("op_4740_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_25_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342753024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346029888))))[name = string("layers_25_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_25_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_25_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346030016)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4740_cast_fp16 = conv(bias = layers_25_fc2_inlier_module_bias_to_fp16, dilations = var_4740_dilations_0, groups = var_4740_groups_0, pad = var_4740_pad_0, pad_type = var_4740_pad_type_0, strides = var_4740_strides_0, weight = layers_25_fc2_inlier_module_weight_to_fp16_palettized, x = input_207_cast_fp16)[name = string("op_4740_cast_fp16")];
+            string var_4746_pad_type_0 = const()[name = string("op_4746_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4746_strides_0 = const()[name = string("op_4746_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4746_pad_0 = const()[name = string("op_4746_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4746_dilations_0 = const()[name = string("op_4746_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4746_groups_0 = const()[name = string("op_4746_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_25_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346081152))), nonzero_data = tensor<fp16, [24223]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346032640))))[name = string("layers_25_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4746_cast_fp16 = conv(dilations = var_4746_dilations_0, groups = var_4746_groups_0, pad = var_4746_pad_0, pad_type = var_4746_pad_type_0, strides = var_4746_strides_0, weight = layers_25_fc2_outlier_module_weight_to_fp16_sparsified, x = input_207_cast_fp16)[name = string("op_4746_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_55_cast_fp16 = add(x = var_4740_cast_fp16, y = var_4746_cast_fp16)[name = string("hidden_states_55_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = hidden_states_55_cast_fp16)[name = string("inputs_105_cast_fp16")];
+            int32 var_4756 = const()[name = string("op_4756"), val = int32(3)];
+            tensor<int32, [1]> out_105_axes_0 = const()[name = string("out_105_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4775_to_fp16 = const()[name = string("op_4775_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_105_cast_fp16 = layer_norm(axes = out_105_axes_0, epsilon = var_4775_to_fp16, x = inputs_105_cast_fp16)[name = string("out_105_cast_fp16")];
+            tensor<fp16, [1280]> obj_105_gamma_0_to_fp16 = const()[name = string("obj_105_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346900416)))];
+            tensor<fp16, [1280]> obj_105_beta_0_to_fp16 = const()[name = string("obj_105_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346903040)))];
+            fp16 obj_105_epsilon_0_to_fp16 = const()[name = string("obj_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_105_cast_fp16 = batch_norm(beta = obj_105_beta_0_to_fp16, epsilon = obj_105_epsilon_0_to_fp16, gamma = obj_105_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_105_cast_fp16)[name = string("obj_105_cast_fp16")];
+            string var_4797_pad_type_0 = const()[name = string("op_4797_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4797_strides_0 = const()[name = string("op_4797_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4797_pad_0 = const()[name = string("op_4797_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4797_dilations_0 = const()[name = string("op_4797_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4797_groups_0 = const()[name = string("op_4797_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346905664))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347724928))))[name = string("layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_26_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_26_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347725056)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4797_cast_fp16 = conv(bias = layers_26_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4797_dilations_0, groups = var_4797_groups_0, pad = var_4797_pad_0, pad_type = var_4797_pad_type_0, strides = var_4797_strides_0, weight = layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = string("op_4797_cast_fp16")];
+            string var_4803_pad_type_0 = const()[name = string("op_4803_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4803_strides_0 = const()[name = string("op_4803_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4803_pad_0 = const()[name = string("op_4803_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4803_dilations_0 = const()[name = string("op_4803_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4803_groups_0 = const()[name = string("op_4803_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347761152))), nonzero_data = tensor<fp16, [16685]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347727680))))[name = string("layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4803_cast_fp16 = conv(dilations = var_4803_dilations_0, groups = var_4803_groups_0, pad = var_4803_pad_0, pad_type = var_4803_pad_type_0, strides = var_4803_strides_0, weight = layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = string("op_4803_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_53_cast_fp16 = add(x = var_4797_cast_fp16, y = var_4803_cast_fp16)[name = string("query_53_cast_fp16")];
+            string var_4812_pad_type_0 = const()[name = string("op_4812_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4812_strides_0 = const()[name = string("op_4812_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4812_pad_0 = const()[name = string("op_4812_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4812_dilations_0 = const()[name = string("op_4812_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4812_groups_0 = const()[name = string("op_4812_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(347966016))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348785280))))[name = string("layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4812_cast_fp16 = conv(dilations = var_4812_dilations_0, groups = var_4812_groups_0, pad = var_4812_pad_0, pad_type = var_4812_pad_type_0, strides = var_4812_strides_0, weight = layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = string("op_4812_cast_fp16")];
+            string var_4818_pad_type_0 = const()[name = string("op_4818_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4818_strides_0 = const()[name = string("op_4818_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4818_pad_0 = const()[name = string("op_4818_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4818_dilations_0 = const()[name = string("op_4818_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4818_groups_0 = const()[name = string("op_4818_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348811904))), nonzero_data = tensor<fp16, [13198]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348785408))))[name = string("layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4818_cast_fp16 = conv(dilations = var_4818_dilations_0, groups = var_4818_groups_0, pad = var_4818_pad_0, pad_type = var_4818_pad_type_0, strides = var_4818_strides_0, weight = layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = string("op_4818_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_53_cast_fp16 = add(x = var_4812_cast_fp16, y = var_4818_cast_fp16)[name = string("key_53_cast_fp16")];
+            string var_4828_pad_type_0 = const()[name = string("op_4828_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4828_strides_0 = const()[name = string("op_4828_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4828_pad_0 = const()[name = string("op_4828_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4828_dilations_0 = const()[name = string("op_4828_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4828_groups_0 = const()[name = string("op_4828_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349016768))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349836032))))[name = string("layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_26_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_26_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349836160)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4828_cast_fp16 = conv(bias = layers_26_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_4828_dilations_0, groups = var_4828_groups_0, pad = var_4828_pad_0, pad_type = var_4828_pad_type_0, strides = var_4828_strides_0, weight = layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = string("op_4828_cast_fp16")];
+            string var_4834_pad_type_0 = const()[name = string("op_4834_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4834_strides_0 = const()[name = string("op_4834_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4834_pad_0 = const()[name = string("op_4834_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4834_dilations_0 = const()[name = string("op_4834_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4834_groups_0 = const()[name = string("op_4834_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349851264))), nonzero_data = tensor<fp16, [6184]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(349838784))))[name = string("layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4834_cast_fp16 = conv(dilations = var_4834_dilations_0, groups = var_4834_groups_0, pad = var_4834_pad_0, pad_type = var_4834_pad_type_0, strides = var_4834_strides_0, weight = layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = string("op_4834_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_53_cast_fp16 = add(x = var_4828_cast_fp16, y = var_4834_cast_fp16)[name = string("value_53_cast_fp16")];
+            tensor<int32, [4]> var_4837 = const()[name = string("op_4837"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_53_cast_fp16 = reshape(shape = var_4837, x = query_53_cast_fp16)[name = string("mh_q_53_cast_fp16")];
+            fp16 var_4839_to_fp16 = const()[name = string("op_4839_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_4840_cast_fp16 = mul(x = mh_q_53_cast_fp16, y = var_4839_to_fp16)[name = string("op_4840_cast_fp16")];
+            tensor<int32, [4]> var_4841 = const()[name = string("op_4841"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4842_cast_fp16 = reshape(shape = var_4841, x = key_53_cast_fp16)[name = string("op_4842_cast_fp16")];
+            bool mh_w_53_transpose_x_0 = const()[name = string("mh_w_53_transpose_x_0"), val = bool(true)];
+            bool mh_w_53_transpose_y_0 = const()[name = string("mh_w_53_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_4840_cast_fp16, y = var_4842_cast_fp16)[name = string("mh_w_53_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_4845_cast_fp16 = softmax(axis = var_4756, x = mh_w_53_cast_fp16)[name = string("op_4845_cast_fp16")];
+            tensor<int32, [4]> var_4846 = const()[name = string("op_4846"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_4847_cast_fp16 = reshape(shape = var_4846, x = value_53_cast_fp16)[name = string("op_4847_cast_fp16")];
+            bool attn_53_transpose_x_0 = const()[name = string("attn_53_transpose_x_0"), val = bool(false)];
+            bool attn_53_transpose_y_0 = const()[name = string("attn_53_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_53_cast_fp16 = matmul(transpose_x = attn_53_transpose_x_0, transpose_y = attn_53_transpose_y_0, x = var_4847_cast_fp16, y = var_4845_cast_fp16)[name = string("attn_53_cast_fp16")];
+            tensor<int32, [4]> var_4850 = const()[name = string("op_4850"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_209_cast_fp16 = reshape(shape = var_4850, x = attn_53_cast_fp16)[name = string("input_209_cast_fp16")];
+            string var_4860_pad_type_0 = const()[name = string("op_4860_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4860_strides_0 = const()[name = string("op_4860_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4860_pad_0 = const()[name = string("op_4860_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4860_dilations_0 = const()[name = string("op_4860_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4860_groups_0 = const()[name = string("op_4860_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350056128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350875392))))[name = string("layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_26_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_26_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350875520)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4860_cast_fp16 = conv(bias = layers_26_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4860_dilations_0, groups = var_4860_groups_0, pad = var_4860_pad_0, pad_type = var_4860_pad_type_0, strides = var_4860_strides_0, weight = layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_209_cast_fp16)[name = string("op_4860_cast_fp16")];
+            string var_4866_pad_type_0 = const()[name = string("op_4866_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4866_strides_0 = const()[name = string("op_4866_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4866_pad_0 = const()[name = string("op_4866_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4866_dilations_0 = const()[name = string("op_4866_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4866_groups_0 = const()[name = string("op_4866_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350889984))), nonzero_data = tensor<fp16, [5866]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(350878144))))[name = string("layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4866_cast_fp16 = conv(dilations = var_4866_dilations_0, groups = var_4866_groups_0, pad = var_4866_pad_0, pad_type = var_4866_pad_type_0, strides = var_4866_strides_0, weight = layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_209_cast_fp16)[name = string("op_4866_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_107_cast_fp16 = add(x = var_4860_cast_fp16, y = var_4866_cast_fp16)[name = string("obj_107_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_107_cast_fp16 = add(x = inputs_105_cast_fp16, y = obj_107_cast_fp16)[name = string("inputs_107_cast_fp16")];
+            tensor<int32, [1]> out_107_axes_0 = const()[name = string("out_107_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4877_to_fp16 = const()[name = string("op_4877_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_107_cast_fp16 = layer_norm(axes = out_107_axes_0, epsilon = var_4877_to_fp16, x = inputs_107_cast_fp16)[name = string("out_107_cast_fp16")];
+            tensor<fp16, [1280]> input_211_gamma_0_to_fp16 = const()[name = string("input_211_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351094848)))];
+            tensor<fp16, [1280]> input_211_beta_0_to_fp16 = const()[name = string("input_211_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351097472)))];
+            fp16 input_211_epsilon_0_to_fp16 = const()[name = string("input_211_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_211_cast_fp16 = batch_norm(beta = input_211_beta_0_to_fp16, epsilon = input_211_epsilon_0_to_fp16, gamma = input_211_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_107_cast_fp16)[name = string("input_211_cast_fp16")];
+            string var_4895_pad_type_0 = const()[name = string("op_4895_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4895_strides_0 = const()[name = string("op_4895_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4895_pad_0 = const()[name = string("op_4895_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4895_dilations_0 = const()[name = string("op_4895_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4895_groups_0 = const()[name = string("op_4895_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_26_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351100096))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354376960))))[name = string("layers_26_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_26_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_26_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354377088)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4895_cast_fp16 = conv(bias = layers_26_fc1_inlier_module_bias_to_fp16, dilations = var_4895_dilations_0, groups = var_4895_groups_0, pad = var_4895_pad_0, pad_type = var_4895_pad_type_0, strides = var_4895_strides_0, weight = layers_26_fc1_inlier_module_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = string("op_4895_cast_fp16")];
+            string var_4901_pad_type_0 = const()[name = string("op_4901_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4901_strides_0 = const()[name = string("op_4901_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4901_pad_0 = const()[name = string("op_4901_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4901_dilations_0 = const()[name = string("op_4901_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4901_groups_0 = const()[name = string("op_4901_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_26_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354440128))), nonzero_data = tensor<fp16, [26314]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(354387392))))[name = string("layers_26_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_4901_cast_fp16 = conv(dilations = var_4901_dilations_0, groups = var_4901_groups_0, pad = var_4901_pad_0, pad_type = var_4901_pad_type_0, strides = var_4901_strides_0, weight = layers_26_fc1_outlier_module_weight_to_fp16_sparsified, x = input_211_cast_fp16)[name = string("op_4901_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_213_cast_fp16 = add(x = var_4895_cast_fp16, y = var_4901_cast_fp16)[name = string("input_213_cast_fp16")];
+            string input_215_mode_0 = const()[name = string("input_215_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_215_cast_fp16 = gelu(mode = input_215_mode_0, x = input_213_cast_fp16)[name = string("input_215_cast_fp16")];
+            string var_4912_pad_type_0 = const()[name = string("op_4912_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4912_strides_0 = const()[name = string("op_4912_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4912_pad_0 = const()[name = string("op_4912_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4912_dilations_0 = const()[name = string("op_4912_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4912_groups_0 = const()[name = string("op_4912_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_26_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355259392))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358536256))))[name = string("layers_26_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_26_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_26_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358536384)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4912_cast_fp16 = conv(bias = layers_26_fc2_inlier_module_bias_to_fp16, dilations = var_4912_dilations_0, groups = var_4912_groups_0, pad = var_4912_pad_0, pad_type = var_4912_pad_type_0, strides = var_4912_strides_0, weight = layers_26_fc2_inlier_module_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = string("op_4912_cast_fp16")];
+            string var_4918_pad_type_0 = const()[name = string("op_4918_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4918_strides_0 = const()[name = string("op_4918_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4918_pad_0 = const()[name = string("op_4918_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4918_dilations_0 = const()[name = string("op_4918_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4918_groups_0 = const()[name = string("op_4918_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_26_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358588352))), nonzero_data = tensor<fp16, [24636]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358539008))))[name = string("layers_26_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4918_cast_fp16 = conv(dilations = var_4918_dilations_0, groups = var_4918_groups_0, pad = var_4918_pad_0, pad_type = var_4918_pad_type_0, strides = var_4918_strides_0, weight = layers_26_fc2_outlier_module_weight_to_fp16_sparsified, x = input_215_cast_fp16)[name = string("op_4918_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_57_cast_fp16 = add(x = var_4912_cast_fp16, y = var_4918_cast_fp16)[name = string("hidden_states_57_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = hidden_states_57_cast_fp16)[name = string("inputs_109_cast_fp16")];
+            int32 var_4928 = const()[name = string("op_4928"), val = int32(3)];
+            tensor<int32, [1]> out_109_axes_0 = const()[name = string("out_109_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_4947_to_fp16 = const()[name = string("op_4947_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_4947_to_fp16, x = inputs_109_cast_fp16)[name = string("out_109_cast_fp16")];
+            tensor<fp16, [1280]> obj_109_gamma_0_to_fp16 = const()[name = string("obj_109_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359407616)))];
+            tensor<fp16, [1280]> obj_109_beta_0_to_fp16 = const()[name = string("obj_109_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359410240)))];
+            fp16 obj_109_epsilon_0_to_fp16 = const()[name = string("obj_109_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_109_cast_fp16 = batch_norm(beta = obj_109_beta_0_to_fp16, epsilon = obj_109_epsilon_0_to_fp16, gamma = obj_109_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_109_cast_fp16)[name = string("obj_109_cast_fp16")];
+            string var_4969_pad_type_0 = const()[name = string("op_4969_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4969_strides_0 = const()[name = string("op_4969_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4969_pad_0 = const()[name = string("op_4969_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4969_dilations_0 = const()[name = string("op_4969_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4969_groups_0 = const()[name = string("op_4969_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(359412864))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360232128))))[name = string("layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_27_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_27_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360232256)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4969_cast_fp16 = conv(bias = layers_27_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_4969_dilations_0, groups = var_4969_groups_0, pad = var_4969_pad_0, pad_type = var_4969_pad_type_0, strides = var_4969_strides_0, weight = layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = string("op_4969_cast_fp16")];
+            string var_4975_pad_type_0 = const()[name = string("op_4975_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4975_strides_0 = const()[name = string("op_4975_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4975_pad_0 = const()[name = string("op_4975_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4975_dilations_0 = const()[name = string("op_4975_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4975_groups_0 = const()[name = string("op_4975_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360271616))), nonzero_data = tensor<fp16, [18336]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360234880))))[name = string("layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4975_cast_fp16 = conv(dilations = var_4975_dilations_0, groups = var_4975_groups_0, pad = var_4975_pad_0, pad_type = var_4975_pad_type_0, strides = var_4975_strides_0, weight = layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = string("op_4975_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_55_cast_fp16 = add(x = var_4969_cast_fp16, y = var_4975_cast_fp16)[name = string("query_55_cast_fp16")];
+            string var_4984_pad_type_0 = const()[name = string("op_4984_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4984_strides_0 = const()[name = string("op_4984_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4984_pad_0 = const()[name = string("op_4984_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4984_dilations_0 = const()[name = string("op_4984_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4984_groups_0 = const()[name = string("op_4984_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(360476480))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361295744))))[name = string("layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4984_cast_fp16 = conv(dilations = var_4984_dilations_0, groups = var_4984_groups_0, pad = var_4984_pad_0, pad_type = var_4984_pad_type_0, strides = var_4984_strides_0, weight = layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = string("op_4984_cast_fp16")];
+            string var_4990_pad_type_0 = const()[name = string("op_4990_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_4990_strides_0 = const()[name = string("op_4990_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_4990_pad_0 = const()[name = string("op_4990_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_4990_dilations_0 = const()[name = string("op_4990_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_4990_groups_0 = const()[name = string("op_4990_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361321984))), nonzero_data = tensor<fp16, [12996]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361295872))))[name = string("layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_4990_cast_fp16 = conv(dilations = var_4990_dilations_0, groups = var_4990_groups_0, pad = var_4990_pad_0, pad_type = var_4990_pad_type_0, strides = var_4990_strides_0, weight = layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = string("op_4990_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_55_cast_fp16 = add(x = var_4984_cast_fp16, y = var_4990_cast_fp16)[name = string("key_55_cast_fp16")];
+            string var_5000_pad_type_0 = const()[name = string("op_5000_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5000_strides_0 = const()[name = string("op_5000_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5000_pad_0 = const()[name = string("op_5000_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5000_dilations_0 = const()[name = string("op_5000_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5000_groups_0 = const()[name = string("op_5000_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361526848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362346112))))[name = string("layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_27_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_27_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362346240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5000_cast_fp16 = conv(bias = layers_27_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5000_dilations_0, groups = var_5000_groups_0, pad = var_5000_pad_0, pad_type = var_5000_pad_type_0, strides = var_5000_strides_0, weight = layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = string("op_5000_cast_fp16")];
+            string var_5006_pad_type_0 = const()[name = string("op_5006_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5006_strides_0 = const()[name = string("op_5006_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5006_pad_0 = const()[name = string("op_5006_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5006_dilations_0 = const()[name = string("op_5006_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5006_groups_0 = const()[name = string("op_5006_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362359552))), nonzero_data = tensor<fp16, [5306]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362348864))))[name = string("layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5006_cast_fp16 = conv(dilations = var_5006_dilations_0, groups = var_5006_groups_0, pad = var_5006_pad_0, pad_type = var_5006_pad_type_0, strides = var_5006_strides_0, weight = layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = string("op_5006_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_55_cast_fp16 = add(x = var_5000_cast_fp16, y = var_5006_cast_fp16)[name = string("value_55_cast_fp16")];
+            tensor<int32, [4]> var_5009 = const()[name = string("op_5009"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_55_cast_fp16 = reshape(shape = var_5009, x = query_55_cast_fp16)[name = string("mh_q_55_cast_fp16")];
+            fp16 var_5011_to_fp16 = const()[name = string("op_5011_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_5012_cast_fp16 = mul(x = mh_q_55_cast_fp16, y = var_5011_to_fp16)[name = string("op_5012_cast_fp16")];
+            tensor<int32, [4]> var_5013 = const()[name = string("op_5013"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5014_cast_fp16 = reshape(shape = var_5013, x = key_55_cast_fp16)[name = string("op_5014_cast_fp16")];
+            bool mh_w_55_transpose_x_0 = const()[name = string("mh_w_55_transpose_x_0"), val = bool(true)];
+            bool mh_w_55_transpose_y_0 = const()[name = string("mh_w_55_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_55_cast_fp16 = matmul(transpose_x = mh_w_55_transpose_x_0, transpose_y = mh_w_55_transpose_y_0, x = var_5012_cast_fp16, y = var_5014_cast_fp16)[name = string("mh_w_55_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_5017_cast_fp16 = softmax(axis = var_4928, x = mh_w_55_cast_fp16)[name = string("op_5017_cast_fp16")];
+            tensor<int32, [4]> var_5018 = const()[name = string("op_5018"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5019_cast_fp16 = reshape(shape = var_5018, x = value_55_cast_fp16)[name = string("op_5019_cast_fp16")];
+            bool attn_55_transpose_x_0 = const()[name = string("attn_55_transpose_x_0"), val = bool(false)];
+            bool attn_55_transpose_y_0 = const()[name = string("attn_55_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_55_cast_fp16 = matmul(transpose_x = attn_55_transpose_x_0, transpose_y = attn_55_transpose_y_0, x = var_5019_cast_fp16, y = var_5017_cast_fp16)[name = string("attn_55_cast_fp16")];
+            tensor<int32, [4]> var_5022 = const()[name = string("op_5022"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_217_cast_fp16 = reshape(shape = var_5022, x = attn_55_cast_fp16)[name = string("input_217_cast_fp16")];
+            string var_5032_pad_type_0 = const()[name = string("op_5032_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5032_strides_0 = const()[name = string("op_5032_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5032_pad_0 = const()[name = string("op_5032_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5032_dilations_0 = const()[name = string("op_5032_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5032_groups_0 = const()[name = string("op_5032_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(362564416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363383680))))[name = string("layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_27_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_27_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363383808)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5032_cast_fp16 = conv(bias = layers_27_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5032_dilations_0, groups = var_5032_groups_0, pad = var_5032_pad_0, pad_type = var_5032_pad_type_0, strides = var_5032_strides_0, weight = layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_217_cast_fp16)[name = string("op_5032_cast_fp16")];
+            string var_5038_pad_type_0 = const()[name = string("op_5038_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5038_strides_0 = const()[name = string("op_5038_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5038_pad_0 = const()[name = string("op_5038_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5038_dilations_0 = const()[name = string("op_5038_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5038_groups_0 = const()[name = string("op_5038_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363396544))), nonzero_data = tensor<fp16, [4994]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363386432))))[name = string("layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5038_cast_fp16 = conv(dilations = var_5038_dilations_0, groups = var_5038_groups_0, pad = var_5038_pad_0, pad_type = var_5038_pad_type_0, strides = var_5038_strides_0, weight = layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_217_cast_fp16)[name = string("op_5038_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_111_cast_fp16 = add(x = var_5032_cast_fp16, y = var_5038_cast_fp16)[name = string("obj_111_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = obj_111_cast_fp16)[name = string("inputs_111_cast_fp16")];
+            tensor<int32, [1]> out_111_axes_0 = const()[name = string("out_111_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5049_to_fp16 = const()[name = string("op_5049_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_111_cast_fp16 = layer_norm(axes = out_111_axes_0, epsilon = var_5049_to_fp16, x = inputs_111_cast_fp16)[name = string("out_111_cast_fp16")];
+            tensor<fp16, [1280]> input_219_gamma_0_to_fp16 = const()[name = string("input_219_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363601408)))];
+            tensor<fp16, [1280]> input_219_beta_0_to_fp16 = const()[name = string("input_219_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363604032)))];
+            fp16 input_219_epsilon_0_to_fp16 = const()[name = string("input_219_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_219_cast_fp16 = batch_norm(beta = input_219_beta_0_to_fp16, epsilon = input_219_epsilon_0_to_fp16, gamma = input_219_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_111_cast_fp16)[name = string("input_219_cast_fp16")];
+            string var_5067_pad_type_0 = const()[name = string("op_5067_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5067_strides_0 = const()[name = string("op_5067_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5067_pad_0 = const()[name = string("op_5067_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5067_dilations_0 = const()[name = string("op_5067_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5067_groups_0 = const()[name = string("op_5067_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_27_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(363606656))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366883520))))[name = string("layers_27_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_27_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_27_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366883648)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5067_cast_fp16 = conv(bias = layers_27_fc1_inlier_module_bias_to_fp16, dilations = var_5067_dilations_0, groups = var_5067_groups_0, pad = var_5067_pad_0, pad_type = var_5067_pad_type_0, strides = var_5067_strides_0, weight = layers_27_fc1_inlier_module_weight_to_fp16_palettized, x = input_219_cast_fp16)[name = string("op_5067_cast_fp16")];
+            string var_5073_pad_type_0 = const()[name = string("op_5073_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5073_strides_0 = const()[name = string("op_5073_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5073_pad_0 = const()[name = string("op_5073_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5073_dilations_0 = const()[name = string("op_5073_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5073_groups_0 = const()[name = string("op_5073_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_27_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366948672))), nonzero_data = tensor<fp16, [27310]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(366893952))))[name = string("layers_27_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5073_cast_fp16 = conv(dilations = var_5073_dilations_0, groups = var_5073_groups_0, pad = var_5073_pad_0, pad_type = var_5073_pad_type_0, strides = var_5073_strides_0, weight = layers_27_fc1_outlier_module_weight_to_fp16_sparsified, x = input_219_cast_fp16)[name = string("op_5073_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_221_cast_fp16 = add(x = var_5067_cast_fp16, y = var_5073_cast_fp16)[name = string("input_221_cast_fp16")];
+            string input_223_mode_0 = const()[name = string("input_223_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_223_cast_fp16 = gelu(mode = input_223_mode_0, x = input_221_cast_fp16)[name = string("input_223_cast_fp16")];
+            string var_5084_pad_type_0 = const()[name = string("op_5084_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5084_strides_0 = const()[name = string("op_5084_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5084_pad_0 = const()[name = string("op_5084_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5084_dilations_0 = const()[name = string("op_5084_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5084_groups_0 = const()[name = string("op_5084_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_27_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367767936))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371044800))))[name = string("layers_27_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_27_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_27_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371044928)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5084_cast_fp16 = conv(bias = layers_27_fc2_inlier_module_bias_to_fp16, dilations = var_5084_dilations_0, groups = var_5084_groups_0, pad = var_5084_pad_0, pad_type = var_5084_pad_type_0, strides = var_5084_strides_0, weight = layers_27_fc2_inlier_module_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = string("op_5084_cast_fp16")];
+            string var_5090_pad_type_0 = const()[name = string("op_5090_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5090_strides_0 = const()[name = string("op_5090_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5090_pad_0 = const()[name = string("op_5090_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5090_dilations_0 = const()[name = string("op_5090_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5090_groups_0 = const()[name = string("op_5090_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_27_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371098048))), nonzero_data = tensor<fp16, [25203]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371047552))))[name = string("layers_27_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5090_cast_fp16 = conv(dilations = var_5090_dilations_0, groups = var_5090_groups_0, pad = var_5090_pad_0, pad_type = var_5090_pad_type_0, strides = var_5090_strides_0, weight = layers_27_fc2_outlier_module_weight_to_fp16_sparsified, x = input_223_cast_fp16)[name = string("op_5090_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_59_cast_fp16 = add(x = var_5084_cast_fp16, y = var_5090_cast_fp16)[name = string("hidden_states_59_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = hidden_states_59_cast_fp16)[name = string("inputs_113_cast_fp16")];
+            int32 var_5100 = const()[name = string("op_5100"), val = int32(3)];
+            tensor<int32, [1]> out_113_axes_0 = const()[name = string("out_113_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5119_to_fp16 = const()[name = string("op_5119_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_113_cast_fp16 = layer_norm(axes = out_113_axes_0, epsilon = var_5119_to_fp16, x = inputs_113_cast_fp16)[name = string("out_113_cast_fp16")];
+            tensor<fp16, [1280]> obj_113_gamma_0_to_fp16 = const()[name = string("obj_113_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371917312)))];
+            tensor<fp16, [1280]> obj_113_beta_0_to_fp16 = const()[name = string("obj_113_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371919936)))];
+            fp16 obj_113_epsilon_0_to_fp16 = const()[name = string("obj_113_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_113_cast_fp16)[name = string("obj_113_cast_fp16")];
+            string var_5141_pad_type_0 = const()[name = string("op_5141_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5141_strides_0 = const()[name = string("op_5141_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5141_pad_0 = const()[name = string("op_5141_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5141_dilations_0 = const()[name = string("op_5141_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5141_groups_0 = const()[name = string("op_5141_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(371922560))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372741824))))[name = string("layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_28_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_28_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372741952)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5141_cast_fp16 = conv(bias = layers_28_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5141_dilations_0, groups = var_5141_groups_0, pad = var_5141_pad_0, pad_type = var_5141_pad_type_0, strides = var_5141_strides_0, weight = layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = string("op_5141_cast_fp16")];
+            string var_5147_pad_type_0 = const()[name = string("op_5147_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5147_strides_0 = const()[name = string("op_5147_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5147_pad_0 = const()[name = string("op_5147_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5147_dilations_0 = const()[name = string("op_5147_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5147_groups_0 = const()[name = string("op_5147_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372772992))), nonzero_data = tensor<fp16, [14146]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372744576))))[name = string("layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5147_cast_fp16 = conv(dilations = var_5147_dilations_0, groups = var_5147_groups_0, pad = var_5147_pad_0, pad_type = var_5147_pad_type_0, strides = var_5147_strides_0, weight = layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = string("op_5147_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_57_cast_fp16 = add(x = var_5141_cast_fp16, y = var_5147_cast_fp16)[name = string("query_57_cast_fp16")];
+            string var_5156_pad_type_0 = const()[name = string("op_5156_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5156_strides_0 = const()[name = string("op_5156_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5156_pad_0 = const()[name = string("op_5156_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5156_dilations_0 = const()[name = string("op_5156_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5156_groups_0 = const()[name = string("op_5156_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(372977856))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373797120))))[name = string("layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5156_cast_fp16 = conv(dilations = var_5156_dilations_0, groups = var_5156_groups_0, pad = var_5156_pad_0, pad_type = var_5156_pad_type_0, strides = var_5156_strides_0, weight = layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = string("op_5156_cast_fp16")];
+            string var_5162_pad_type_0 = const()[name = string("op_5162_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5162_strides_0 = const()[name = string("op_5162_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5162_pad_0 = const()[name = string("op_5162_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5162_dilations_0 = const()[name = string("op_5162_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5162_groups_0 = const()[name = string("op_5162_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373819328))), nonzero_data = tensor<fp16, [10981]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(373797248))))[name = string("layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5162_cast_fp16 = conv(dilations = var_5162_dilations_0, groups = var_5162_groups_0, pad = var_5162_pad_0, pad_type = var_5162_pad_type_0, strides = var_5162_strides_0, weight = layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = string("op_5162_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_57_cast_fp16 = add(x = var_5156_cast_fp16, y = var_5162_cast_fp16)[name = string("key_57_cast_fp16")];
+            string var_5172_pad_type_0 = const()[name = string("op_5172_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5172_strides_0 = const()[name = string("op_5172_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5172_pad_0 = const()[name = string("op_5172_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5172_dilations_0 = const()[name = string("op_5172_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5172_groups_0 = const()[name = string("op_5172_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374024192))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374843456))))[name = string("layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_28_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_28_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374843584)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5172_cast_fp16 = conv(bias = layers_28_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5172_dilations_0, groups = var_5172_groups_0, pad = var_5172_pad_0, pad_type = var_5172_pad_type_0, strides = var_5172_strides_0, weight = layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = string("op_5172_cast_fp16")];
+            string var_5178_pad_type_0 = const()[name = string("op_5178_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5178_strides_0 = const()[name = string("op_5178_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5178_pad_0 = const()[name = string("op_5178_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5178_dilations_0 = const()[name = string("op_5178_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5178_groups_0 = const()[name = string("op_5178_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374858048))), nonzero_data = tensor<fp16, [5866]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374846208))))[name = string("layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5178_cast_fp16 = conv(dilations = var_5178_dilations_0, groups = var_5178_groups_0, pad = var_5178_pad_0, pad_type = var_5178_pad_type_0, strides = var_5178_strides_0, weight = layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = string("op_5178_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_57_cast_fp16 = add(x = var_5172_cast_fp16, y = var_5178_cast_fp16)[name = string("value_57_cast_fp16")];
+            tensor<int32, [4]> var_5181 = const()[name = string("op_5181"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_57_cast_fp16 = reshape(shape = var_5181, x = query_57_cast_fp16)[name = string("mh_q_57_cast_fp16")];
+            fp16 var_5183_to_fp16 = const()[name = string("op_5183_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_5184_cast_fp16 = mul(x = mh_q_57_cast_fp16, y = var_5183_to_fp16)[name = string("op_5184_cast_fp16")];
+            tensor<int32, [4]> var_5185 = const()[name = string("op_5185"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5186_cast_fp16 = reshape(shape = var_5185, x = key_57_cast_fp16)[name = string("op_5186_cast_fp16")];
+            bool mh_w_57_transpose_x_0 = const()[name = string("mh_w_57_transpose_x_0"), val = bool(true)];
+            bool mh_w_57_transpose_y_0 = const()[name = string("mh_w_57_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_57_cast_fp16 = matmul(transpose_x = mh_w_57_transpose_x_0, transpose_y = mh_w_57_transpose_y_0, x = var_5184_cast_fp16, y = var_5186_cast_fp16)[name = string("mh_w_57_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_5189_cast_fp16 = softmax(axis = var_5100, x = mh_w_57_cast_fp16)[name = string("op_5189_cast_fp16")];
+            tensor<int32, [4]> var_5190 = const()[name = string("op_5190"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5191_cast_fp16 = reshape(shape = var_5190, x = value_57_cast_fp16)[name = string("op_5191_cast_fp16")];
+            bool attn_57_transpose_x_0 = const()[name = string("attn_57_transpose_x_0"), val = bool(false)];
+            bool attn_57_transpose_y_0 = const()[name = string("attn_57_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_57_cast_fp16 = matmul(transpose_x = attn_57_transpose_x_0, transpose_y = attn_57_transpose_y_0, x = var_5191_cast_fp16, y = var_5189_cast_fp16)[name = string("attn_57_cast_fp16")];
+            tensor<int32, [4]> var_5194 = const()[name = string("op_5194"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_225_cast_fp16 = reshape(shape = var_5194, x = attn_57_cast_fp16)[name = string("input_225_cast_fp16")];
+            string var_5204_pad_type_0 = const()[name = string("op_5204_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5204_strides_0 = const()[name = string("op_5204_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5204_pad_0 = const()[name = string("op_5204_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5204_dilations_0 = const()[name = string("op_5204_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5204_groups_0 = const()[name = string("op_5204_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375062912))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375882176))))[name = string("layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_28_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_28_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375882304)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5204_cast_fp16 = conv(bias = layers_28_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5204_dilations_0, groups = var_5204_groups_0, pad = var_5204_pad_0, pad_type = var_5204_pad_type_0, strides = var_5204_strides_0, weight = layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_225_cast_fp16)[name = string("op_5204_cast_fp16")];
+            string var_5210_pad_type_0 = const()[name = string("op_5210_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5210_strides_0 = const()[name = string("op_5210_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5210_pad_0 = const()[name = string("op_5210_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5210_dilations_0 = const()[name = string("op_5210_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5210_groups_0 = const()[name = string("op_5210_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375896192))), nonzero_data = tensor<fp16, [5591]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(375884928))))[name = string("layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5210_cast_fp16 = conv(dilations = var_5210_dilations_0, groups = var_5210_groups_0, pad = var_5210_pad_0, pad_type = var_5210_pad_type_0, strides = var_5210_strides_0, weight = layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_225_cast_fp16)[name = string("op_5210_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_115_cast_fp16 = add(x = var_5204_cast_fp16, y = var_5210_cast_fp16)[name = string("obj_115_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_115_cast_fp16 = add(x = inputs_113_cast_fp16, y = obj_115_cast_fp16)[name = string("inputs_115_cast_fp16")];
+            tensor<int32, [1]> out_115_axes_0 = const()[name = string("out_115_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5221_to_fp16 = const()[name = string("op_5221_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_5221_to_fp16, x = inputs_115_cast_fp16)[name = string("out_115_cast_fp16")];
+            tensor<fp16, [1280]> input_227_gamma_0_to_fp16 = const()[name = string("input_227_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376101056)))];
+            tensor<fp16, [1280]> input_227_beta_0_to_fp16 = const()[name = string("input_227_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376103680)))];
+            fp16 input_227_epsilon_0_to_fp16 = const()[name = string("input_227_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_227_cast_fp16 = batch_norm(beta = input_227_beta_0_to_fp16, epsilon = input_227_epsilon_0_to_fp16, gamma = input_227_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_115_cast_fp16)[name = string("input_227_cast_fp16")];
+            string var_5239_pad_type_0 = const()[name = string("op_5239_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5239_strides_0 = const()[name = string("op_5239_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5239_pad_0 = const()[name = string("op_5239_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5239_dilations_0 = const()[name = string("op_5239_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5239_groups_0 = const()[name = string("op_5239_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_28_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376106304))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379383168))))[name = string("layers_28_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_28_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_28_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379383296)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5239_cast_fp16 = conv(bias = layers_28_fc1_inlier_module_bias_to_fp16, dilations = var_5239_dilations_0, groups = var_5239_groups_0, pad = var_5239_pad_0, pad_type = var_5239_pad_type_0, strides = var_5239_strides_0, weight = layers_28_fc1_inlier_module_weight_to_fp16_palettized, x = input_227_cast_fp16)[name = string("op_5239_cast_fp16")];
+            string var_5245_pad_type_0 = const()[name = string("op_5245_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5245_strides_0 = const()[name = string("op_5245_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5245_pad_0 = const()[name = string("op_5245_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5245_dilations_0 = const()[name = string("op_5245_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5245_groups_0 = const()[name = string("op_5245_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_28_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379450816))), nonzero_data = tensor<fp16, [28562]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(379393600))))[name = string("layers_28_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5245_cast_fp16 = conv(dilations = var_5245_dilations_0, groups = var_5245_groups_0, pad = var_5245_pad_0, pad_type = var_5245_pad_type_0, strides = var_5245_strides_0, weight = layers_28_fc1_outlier_module_weight_to_fp16_sparsified, x = input_227_cast_fp16)[name = string("op_5245_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_229_cast_fp16 = add(x = var_5239_cast_fp16, y = var_5245_cast_fp16)[name = string("input_229_cast_fp16")];
+            string input_231_mode_0 = const()[name = string("input_231_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = string("input_231_cast_fp16")];
+            string var_5256_pad_type_0 = const()[name = string("op_5256_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5256_strides_0 = const()[name = string("op_5256_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5256_pad_0 = const()[name = string("op_5256_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5256_dilations_0 = const()[name = string("op_5256_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5256_groups_0 = const()[name = string("op_5256_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_28_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380270080))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383546944))))[name = string("layers_28_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_28_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_28_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383547072)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5256_cast_fp16 = conv(bias = layers_28_fc2_inlier_module_bias_to_fp16, dilations = var_5256_dilations_0, groups = var_5256_groups_0, pad = var_5256_pad_0, pad_type = var_5256_pad_type_0, strides = var_5256_strides_0, weight = layers_28_fc2_inlier_module_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = string("op_5256_cast_fp16")];
+            string var_5262_pad_type_0 = const()[name = string("op_5262_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5262_strides_0 = const()[name = string("op_5262_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5262_pad_0 = const()[name = string("op_5262_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5262_dilations_0 = const()[name = string("op_5262_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5262_groups_0 = const()[name = string("op_5262_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_28_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383608320))), nonzero_data = tensor<fp16, [29259]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(383549696))))[name = string("layers_28_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5262_cast_fp16 = conv(dilations = var_5262_dilations_0, groups = var_5262_groups_0, pad = var_5262_pad_0, pad_type = var_5262_pad_type_0, strides = var_5262_strides_0, weight = layers_28_fc2_outlier_module_weight_to_fp16_sparsified, x = input_231_cast_fp16)[name = string("op_5262_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_61_cast_fp16 = add(x = var_5256_cast_fp16, y = var_5262_cast_fp16)[name = string("hidden_states_61_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = hidden_states_61_cast_fp16)[name = string("inputs_117_cast_fp16")];
+            int32 var_5272 = const()[name = string("op_5272"), val = int32(3)];
+            tensor<int32, [1]> out_117_axes_0 = const()[name = string("out_117_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5291_to_fp16 = const()[name = string("op_5291_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_117_cast_fp16 = layer_norm(axes = out_117_axes_0, epsilon = var_5291_to_fp16, x = inputs_117_cast_fp16)[name = string("out_117_cast_fp16")];
+            tensor<fp16, [1280]> obj_117_gamma_0_to_fp16 = const()[name = string("obj_117_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384427584)))];
+            tensor<fp16, [1280]> obj_117_beta_0_to_fp16 = const()[name = string("obj_117_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384430208)))];
+            fp16 obj_117_epsilon_0_to_fp16 = const()[name = string("obj_117_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_117_cast_fp16 = batch_norm(beta = obj_117_beta_0_to_fp16, epsilon = obj_117_epsilon_0_to_fp16, gamma = obj_117_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_117_cast_fp16)[name = string("obj_117_cast_fp16")];
+            string var_5313_pad_type_0 = const()[name = string("op_5313_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5313_strides_0 = const()[name = string("op_5313_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5313_pad_0 = const()[name = string("op_5313_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5313_dilations_0 = const()[name = string("op_5313_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5313_groups_0 = const()[name = string("op_5313_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(384432832))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385252096))))[name = string("layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_29_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_29_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385252224)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5313_cast_fp16 = conv(bias = layers_29_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5313_dilations_0, groups = var_5313_groups_0, pad = var_5313_pad_0, pad_type = var_5313_pad_type_0, strides = var_5313_strides_0, weight = layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = string("op_5313_cast_fp16")];
+            string var_5319_pad_type_0 = const()[name = string("op_5319_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5319_strides_0 = const()[name = string("op_5319_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5319_pad_0 = const()[name = string("op_5319_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5319_dilations_0 = const()[name = string("op_5319_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5319_groups_0 = const()[name = string("op_5319_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385277312))), nonzero_data = tensor<fp16, [11179]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385254848))))[name = string("layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5319_cast_fp16 = conv(dilations = var_5319_dilations_0, groups = var_5319_groups_0, pad = var_5319_pad_0, pad_type = var_5319_pad_type_0, strides = var_5319_strides_0, weight = layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = string("op_5319_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_59_cast_fp16 = add(x = var_5313_cast_fp16, y = var_5319_cast_fp16)[name = string("query_59_cast_fp16")];
+            string var_5328_pad_type_0 = const()[name = string("op_5328_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5328_strides_0 = const()[name = string("op_5328_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5328_pad_0 = const()[name = string("op_5328_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5328_dilations_0 = const()[name = string("op_5328_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5328_groups_0 = const()[name = string("op_5328_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(385482176))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386301440))))[name = string("layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5328_cast_fp16 = conv(dilations = var_5328_dilations_0, groups = var_5328_groups_0, pad = var_5328_pad_0, pad_type = var_5328_pad_type_0, strides = var_5328_strides_0, weight = layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = string("op_5328_cast_fp16")];
+            string var_5334_pad_type_0 = const()[name = string("op_5334_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5334_strides_0 = const()[name = string("op_5334_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5334_pad_0 = const()[name = string("op_5334_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5334_dilations_0 = const()[name = string("op_5334_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5334_groups_0 = const()[name = string("op_5334_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386323840))), nonzero_data = tensor<fp16, [11076]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386301568))))[name = string("layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5334_cast_fp16 = conv(dilations = var_5334_dilations_0, groups = var_5334_groups_0, pad = var_5334_pad_0, pad_type = var_5334_pad_type_0, strides = var_5334_strides_0, weight = layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = string("op_5334_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_59_cast_fp16 = add(x = var_5328_cast_fp16, y = var_5334_cast_fp16)[name = string("key_59_cast_fp16")];
+            string var_5344_pad_type_0 = const()[name = string("op_5344_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5344_strides_0 = const()[name = string("op_5344_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5344_pad_0 = const()[name = string("op_5344_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5344_dilations_0 = const()[name = string("op_5344_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5344_groups_0 = const()[name = string("op_5344_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386528704))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387347968))))[name = string("layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_29_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_29_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387348096)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5344_cast_fp16 = conv(bias = layers_29_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5344_dilations_0, groups = var_5344_groups_0, pad = var_5344_pad_0, pad_type = var_5344_pad_type_0, strides = var_5344_strides_0, weight = layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = string("op_5344_cast_fp16")];
+            string var_5350_pad_type_0 = const()[name = string("op_5350_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5350_strides_0 = const()[name = string("op_5350_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5350_pad_0 = const()[name = string("op_5350_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5350_dilations_0 = const()[name = string("op_5350_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5350_groups_0 = const()[name = string("op_5350_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387363392))), nonzero_data = tensor<fp16, [6296]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387350720))))[name = string("layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5350_cast_fp16 = conv(dilations = var_5350_dilations_0, groups = var_5350_groups_0, pad = var_5350_pad_0, pad_type = var_5350_pad_type_0, strides = var_5350_strides_0, weight = layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = string("op_5350_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_59_cast_fp16 = add(x = var_5344_cast_fp16, y = var_5350_cast_fp16)[name = string("value_59_cast_fp16")];
+            tensor<int32, [4]> var_5353 = const()[name = string("op_5353"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_59_cast_fp16 = reshape(shape = var_5353, x = query_59_cast_fp16)[name = string("mh_q_59_cast_fp16")];
+            fp16 var_5355_to_fp16 = const()[name = string("op_5355_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_5356_cast_fp16 = mul(x = mh_q_59_cast_fp16, y = var_5355_to_fp16)[name = string("op_5356_cast_fp16")];
+            tensor<int32, [4]> var_5357 = const()[name = string("op_5357"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5358_cast_fp16 = reshape(shape = var_5357, x = key_59_cast_fp16)[name = string("op_5358_cast_fp16")];
+            bool mh_w_59_transpose_x_0 = const()[name = string("mh_w_59_transpose_x_0"), val = bool(true)];
+            bool mh_w_59_transpose_y_0 = const()[name = string("mh_w_59_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_59_cast_fp16 = matmul(transpose_x = mh_w_59_transpose_x_0, transpose_y = mh_w_59_transpose_y_0, x = var_5356_cast_fp16, y = var_5358_cast_fp16)[name = string("mh_w_59_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_5361_cast_fp16 = softmax(axis = var_5272, x = mh_w_59_cast_fp16)[name = string("op_5361_cast_fp16")];
+            tensor<int32, [4]> var_5362 = const()[name = string("op_5362"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5363_cast_fp16 = reshape(shape = var_5362, x = value_59_cast_fp16)[name = string("op_5363_cast_fp16")];
+            bool attn_59_transpose_x_0 = const()[name = string("attn_59_transpose_x_0"), val = bool(false)];
+            bool attn_59_transpose_y_0 = const()[name = string("attn_59_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_59_cast_fp16 = matmul(transpose_x = attn_59_transpose_x_0, transpose_y = attn_59_transpose_y_0, x = var_5363_cast_fp16, y = var_5361_cast_fp16)[name = string("attn_59_cast_fp16")];
+            tensor<int32, [4]> var_5366 = const()[name = string("op_5366"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_233_cast_fp16 = reshape(shape = var_5366, x = attn_59_cast_fp16)[name = string("input_233_cast_fp16")];
+            string var_5376_pad_type_0 = const()[name = string("op_5376_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5376_strides_0 = const()[name = string("op_5376_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5376_pad_0 = const()[name = string("op_5376_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5376_dilations_0 = const()[name = string("op_5376_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5376_groups_0 = const()[name = string("op_5376_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387568256))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388387520))))[name = string("layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_29_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_29_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388387648)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5376_cast_fp16 = conv(bias = layers_29_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5376_dilations_0, groups = var_5376_groups_0, pad = var_5376_pad_0, pad_type = var_5376_pad_type_0, strides = var_5376_strides_0, weight = layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_233_cast_fp16)[name = string("op_5376_cast_fp16")];
+            string var_5382_pad_type_0 = const()[name = string("op_5382_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5382_strides_0 = const()[name = string("op_5382_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5382_pad_0 = const()[name = string("op_5382_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5382_dilations_0 = const()[name = string("op_5382_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5382_groups_0 = const()[name = string("op_5382_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388402304))), nonzero_data = tensor<fp16, [5965]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388390272))))[name = string("layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5382_cast_fp16 = conv(dilations = var_5382_dilations_0, groups = var_5382_groups_0, pad = var_5382_pad_0, pad_type = var_5382_pad_type_0, strides = var_5382_strides_0, weight = layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_233_cast_fp16)[name = string("op_5382_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_119_cast_fp16 = add(x = var_5376_cast_fp16, y = var_5382_cast_fp16)[name = string("obj_119_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = obj_119_cast_fp16)[name = string("inputs_119_cast_fp16")];
+            tensor<int32, [1]> out_119_axes_0 = const()[name = string("out_119_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5393_to_fp16 = const()[name = string("op_5393_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_119_cast_fp16 = layer_norm(axes = out_119_axes_0, epsilon = var_5393_to_fp16, x = inputs_119_cast_fp16)[name = string("out_119_cast_fp16")];
+            tensor<fp16, [1280]> input_235_gamma_0_to_fp16 = const()[name = string("input_235_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388607168)))];
+            tensor<fp16, [1280]> input_235_beta_0_to_fp16 = const()[name = string("input_235_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388609792)))];
+            fp16 input_235_epsilon_0_to_fp16 = const()[name = string("input_235_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_235_cast_fp16 = batch_norm(beta = input_235_beta_0_to_fp16, epsilon = input_235_epsilon_0_to_fp16, gamma = input_235_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_119_cast_fp16)[name = string("input_235_cast_fp16")];
+            string var_5411_pad_type_0 = const()[name = string("op_5411_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5411_strides_0 = const()[name = string("op_5411_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5411_pad_0 = const()[name = string("op_5411_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5411_dilations_0 = const()[name = string("op_5411_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5411_groups_0 = const()[name = string("op_5411_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_29_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(388612416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391889280))))[name = string("layers_29_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_29_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_29_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391889408)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5411_cast_fp16 = conv(bias = layers_29_fc1_inlier_module_bias_to_fp16, dilations = var_5411_dilations_0, groups = var_5411_groups_0, pad = var_5411_pad_0, pad_type = var_5411_pad_type_0, strides = var_5411_strides_0, weight = layers_29_fc1_inlier_module_weight_to_fp16_palettized, x = input_235_cast_fp16)[name = string("op_5411_cast_fp16")];
+            string var_5417_pad_type_0 = const()[name = string("op_5417_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5417_strides_0 = const()[name = string("op_5417_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5417_pad_0 = const()[name = string("op_5417_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5417_dilations_0 = const()[name = string("op_5417_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5417_groups_0 = const()[name = string("op_5417_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_29_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391957888))), nonzero_data = tensor<fp16, [29027]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(391899712))))[name = string("layers_29_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5417_cast_fp16 = conv(dilations = var_5417_dilations_0, groups = var_5417_groups_0, pad = var_5417_pad_0, pad_type = var_5417_pad_type_0, strides = var_5417_strides_0, weight = layers_29_fc1_outlier_module_weight_to_fp16_sparsified, x = input_235_cast_fp16)[name = string("op_5417_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_237_cast_fp16 = add(x = var_5411_cast_fp16, y = var_5417_cast_fp16)[name = string("input_237_cast_fp16")];
+            string input_239_mode_0 = const()[name = string("input_239_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_239_cast_fp16 = gelu(mode = input_239_mode_0, x = input_237_cast_fp16)[name = string("input_239_cast_fp16")];
+            string var_5428_pad_type_0 = const()[name = string("op_5428_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5428_strides_0 = const()[name = string("op_5428_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5428_pad_0 = const()[name = string("op_5428_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5428_dilations_0 = const()[name = string("op_5428_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5428_groups_0 = const()[name = string("op_5428_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_29_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392777152))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396054016))))[name = string("layers_29_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_29_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_29_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396054144)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5428_cast_fp16 = conv(bias = layers_29_fc2_inlier_module_bias_to_fp16, dilations = var_5428_dilations_0, groups = var_5428_groups_0, pad = var_5428_pad_0, pad_type = var_5428_pad_type_0, strides = var_5428_strides_0, weight = layers_29_fc2_inlier_module_weight_to_fp16_palettized, x = input_239_cast_fp16)[name = string("op_5428_cast_fp16")];
+            string var_5434_pad_type_0 = const()[name = string("op_5434_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5434_strides_0 = const()[name = string("op_5434_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5434_pad_0 = const()[name = string("op_5434_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5434_dilations_0 = const()[name = string("op_5434_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5434_groups_0 = const()[name = string("op_5434_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_29_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396118272))), nonzero_data = tensor<fp16, [30702]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396056768))))[name = string("layers_29_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5434_cast_fp16 = conv(dilations = var_5434_dilations_0, groups = var_5434_groups_0, pad = var_5434_pad_0, pad_type = var_5434_pad_type_0, strides = var_5434_strides_0, weight = layers_29_fc2_outlier_module_weight_to_fp16_sparsified, x = input_239_cast_fp16)[name = string("op_5434_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_63_cast_fp16 = add(x = var_5428_cast_fp16, y = var_5434_cast_fp16)[name = string("hidden_states_63_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = hidden_states_63_cast_fp16)[name = string("inputs_121_cast_fp16")];
+            int32 var_5444 = const()[name = string("op_5444"), val = int32(3)];
+            tensor<int32, [1]> out_121_axes_0 = const()[name = string("out_121_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5463_to_fp16 = const()[name = string("op_5463_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_5463_to_fp16, x = inputs_121_cast_fp16)[name = string("out_121_cast_fp16")];
+            tensor<fp16, [1280]> obj_121_gamma_0_to_fp16 = const()[name = string("obj_121_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396937536)))];
+            tensor<fp16, [1280]> obj_121_beta_0_to_fp16 = const()[name = string("obj_121_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396940160)))];
+            fp16 obj_121_epsilon_0_to_fp16 = const()[name = string("obj_121_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_121_cast_fp16)[name = string("obj_121_cast_fp16")];
+            string var_5485_pad_type_0 = const()[name = string("op_5485_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5485_strides_0 = const()[name = string("op_5485_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5485_pad_0 = const()[name = string("op_5485_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5485_dilations_0 = const()[name = string("op_5485_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5485_groups_0 = const()[name = string("op_5485_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(396942784))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397762048))))[name = string("layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_30_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_30_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397762176)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5485_cast_fp16 = conv(bias = layers_30_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5485_dilations_0, groups = var_5485_groups_0, pad = var_5485_pad_0, pad_type = var_5485_pad_type_0, strides = var_5485_strides_0, weight = layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = string("op_5485_cast_fp16")];
+            string var_5491_pad_type_0 = const()[name = string("op_5491_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5491_strides_0 = const()[name = string("op_5491_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5491_pad_0 = const()[name = string("op_5491_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5491_dilations_0 = const()[name = string("op_5491_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5491_groups_0 = const()[name = string("op_5491_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397784960))), nonzero_data = tensor<fp16, [10020]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397764800))))[name = string("layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5491_cast_fp16 = conv(dilations = var_5491_dilations_0, groups = var_5491_groups_0, pad = var_5491_pad_0, pad_type = var_5491_pad_type_0, strides = var_5491_strides_0, weight = layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = string("op_5491_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_61_cast_fp16 = add(x = var_5485_cast_fp16, y = var_5491_cast_fp16)[name = string("query_61_cast_fp16")];
+            string var_5500_pad_type_0 = const()[name = string("op_5500_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5500_strides_0 = const()[name = string("op_5500_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5500_pad_0 = const()[name = string("op_5500_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5500_dilations_0 = const()[name = string("op_5500_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5500_groups_0 = const()[name = string("op_5500_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(397989824))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398809088))))[name = string("layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5500_cast_fp16 = conv(dilations = var_5500_dilations_0, groups = var_5500_groups_0, pad = var_5500_pad_0, pad_type = var_5500_pad_type_0, strides = var_5500_strides_0, weight = layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = string("op_5500_cast_fp16")];
+            string var_5506_pad_type_0 = const()[name = string("op_5506_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5506_strides_0 = const()[name = string("op_5506_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5506_pad_0 = const()[name = string("op_5506_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5506_dilations_0 = const()[name = string("op_5506_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5506_groups_0 = const()[name = string("op_5506_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398830400))), nonzero_data = tensor<fp16, [10558]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(398809216))))[name = string("layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5506_cast_fp16 = conv(dilations = var_5506_dilations_0, groups = var_5506_groups_0, pad = var_5506_pad_0, pad_type = var_5506_pad_type_0, strides = var_5506_strides_0, weight = layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = string("op_5506_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_61_cast_fp16 = add(x = var_5500_cast_fp16, y = var_5506_cast_fp16)[name = string("key_61_cast_fp16")];
+            string var_5516_pad_type_0 = const()[name = string("op_5516_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5516_strides_0 = const()[name = string("op_5516_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5516_pad_0 = const()[name = string("op_5516_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5516_dilations_0 = const()[name = string("op_5516_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5516_groups_0 = const()[name = string("op_5516_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399035264))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399854528))))[name = string("layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_30_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_30_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399854656)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5516_cast_fp16 = conv(bias = layers_30_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5516_dilations_0, groups = var_5516_groups_0, pad = var_5516_pad_0, pad_type = var_5516_pad_type_0, strides = var_5516_strides_0, weight = layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = string("op_5516_cast_fp16")];
+            string var_5522_pad_type_0 = const()[name = string("op_5522_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5522_strides_0 = const()[name = string("op_5522_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5522_pad_0 = const()[name = string("op_5522_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5522_dilations_0 = const()[name = string("op_5522_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5522_groups_0 = const()[name = string("op_5522_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399869696))), nonzero_data = tensor<fp16, [6176]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399857280))))[name = string("layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5522_cast_fp16 = conv(dilations = var_5522_dilations_0, groups = var_5522_groups_0, pad = var_5522_pad_0, pad_type = var_5522_pad_type_0, strides = var_5522_strides_0, weight = layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = string("op_5522_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_61_cast_fp16 = add(x = var_5516_cast_fp16, y = var_5522_cast_fp16)[name = string("value_61_cast_fp16")];
+            tensor<int32, [4]> var_5525 = const()[name = string("op_5525"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_61_cast_fp16 = reshape(shape = var_5525, x = query_61_cast_fp16)[name = string("mh_q_61_cast_fp16")];
+            fp16 var_5527_to_fp16 = const()[name = string("op_5527_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_5528_cast_fp16 = mul(x = mh_q_61_cast_fp16, y = var_5527_to_fp16)[name = string("op_5528_cast_fp16")];
+            tensor<int32, [4]> var_5529 = const()[name = string("op_5529"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5530_cast_fp16 = reshape(shape = var_5529, x = key_61_cast_fp16)[name = string("op_5530_cast_fp16")];
+            bool mh_w_61_transpose_x_0 = const()[name = string("mh_w_61_transpose_x_0"), val = bool(true)];
+            bool mh_w_61_transpose_y_0 = const()[name = string("mh_w_61_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_5528_cast_fp16, y = var_5530_cast_fp16)[name = string("mh_w_61_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_5533_cast_fp16 = softmax(axis = var_5444, x = mh_w_61_cast_fp16)[name = string("op_5533_cast_fp16")];
+            tensor<int32, [4]> var_5534 = const()[name = string("op_5534"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5535_cast_fp16 = reshape(shape = var_5534, x = value_61_cast_fp16)[name = string("op_5535_cast_fp16")];
+            bool attn_61_transpose_x_0 = const()[name = string("attn_61_transpose_x_0"), val = bool(false)];
+            bool attn_61_transpose_y_0 = const()[name = string("attn_61_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_61_cast_fp16 = matmul(transpose_x = attn_61_transpose_x_0, transpose_y = attn_61_transpose_y_0, x = var_5535_cast_fp16, y = var_5533_cast_fp16)[name = string("attn_61_cast_fp16")];
+            tensor<int32, [4]> var_5538 = const()[name = string("op_5538"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_241_cast_fp16 = reshape(shape = var_5538, x = attn_61_cast_fp16)[name = string("input_241_cast_fp16")];
+            string var_5548_pad_type_0 = const()[name = string("op_5548_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5548_strides_0 = const()[name = string("op_5548_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5548_pad_0 = const()[name = string("op_5548_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5548_dilations_0 = const()[name = string("op_5548_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5548_groups_0 = const()[name = string("op_5548_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400074560))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400893824))))[name = string("layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_30_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_30_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400893952)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5548_cast_fp16 = conv(bias = layers_30_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5548_dilations_0, groups = var_5548_groups_0, pad = var_5548_pad_0, pad_type = var_5548_pad_type_0, strides = var_5548_strides_0, weight = layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = string("op_5548_cast_fp16")];
+            string var_5554_pad_type_0 = const()[name = string("op_5554_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5554_strides_0 = const()[name = string("op_5554_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5554_pad_0 = const()[name = string("op_5554_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5554_dilations_0 = const()[name = string("op_5554_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5554_groups_0 = const()[name = string("op_5554_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400908032))), nonzero_data = tensor<fp16, [5673]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(400896576))))[name = string("layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5554_cast_fp16 = conv(dilations = var_5554_dilations_0, groups = var_5554_groups_0, pad = var_5554_pad_0, pad_type = var_5554_pad_type_0, strides = var_5554_strides_0, weight = layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_241_cast_fp16)[name = string("op_5554_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_123_cast_fp16 = add(x = var_5548_cast_fp16, y = var_5554_cast_fp16)[name = string("obj_123_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_123_cast_fp16 = add(x = inputs_121_cast_fp16, y = obj_123_cast_fp16)[name = string("inputs_123_cast_fp16")];
+            tensor<int32, [1]> out_123_axes_0 = const()[name = string("out_123_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5565_to_fp16 = const()[name = string("op_5565_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_123_cast_fp16 = layer_norm(axes = out_123_axes_0, epsilon = var_5565_to_fp16, x = inputs_123_cast_fp16)[name = string("out_123_cast_fp16")];
+            tensor<fp16, [1280]> input_243_gamma_0_to_fp16 = const()[name = string("input_243_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401112896)))];
+            tensor<fp16, [1280]> input_243_beta_0_to_fp16 = const()[name = string("input_243_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401115520)))];
+            fp16 input_243_epsilon_0_to_fp16 = const()[name = string("input_243_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_243_cast_fp16 = batch_norm(beta = input_243_beta_0_to_fp16, epsilon = input_243_epsilon_0_to_fp16, gamma = input_243_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_123_cast_fp16)[name = string("input_243_cast_fp16")];
+            string var_5583_pad_type_0 = const()[name = string("op_5583_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5583_strides_0 = const()[name = string("op_5583_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5583_pad_0 = const()[name = string("op_5583_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5583_dilations_0 = const()[name = string("op_5583_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5583_groups_0 = const()[name = string("op_5583_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_30_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401118144))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404395008))))[name = string("layers_30_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_30_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_30_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404395136)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5583_cast_fp16 = conv(bias = layers_30_fc1_inlier_module_bias_to_fp16, dilations = var_5583_dilations_0, groups = var_5583_groups_0, pad = var_5583_pad_0, pad_type = var_5583_pad_type_0, strides = var_5583_strides_0, weight = layers_30_fc1_inlier_module_weight_to_fp16_palettized, x = input_243_cast_fp16)[name = string("op_5583_cast_fp16")];
+            string var_5589_pad_type_0 = const()[name = string("op_5589_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5589_strides_0 = const()[name = string("op_5589_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5589_pad_0 = const()[name = string("op_5589_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5589_dilations_0 = const()[name = string("op_5589_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5589_groups_0 = const()[name = string("op_5589_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_30_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404464960))), nonzero_data = tensor<fp16, [29723]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(404405440))))[name = string("layers_30_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5589_cast_fp16 = conv(dilations = var_5589_dilations_0, groups = var_5589_groups_0, pad = var_5589_pad_0, pad_type = var_5589_pad_type_0, strides = var_5589_strides_0, weight = layers_30_fc1_outlier_module_weight_to_fp16_sparsified, x = input_243_cast_fp16)[name = string("op_5589_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_245_cast_fp16 = add(x = var_5583_cast_fp16, y = var_5589_cast_fp16)[name = string("input_245_cast_fp16")];
+            string input_247_mode_0 = const()[name = string("input_247_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_247_cast_fp16 = gelu(mode = input_247_mode_0, x = input_245_cast_fp16)[name = string("input_247_cast_fp16")];
+            string var_5600_pad_type_0 = const()[name = string("op_5600_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5600_strides_0 = const()[name = string("op_5600_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5600_pad_0 = const()[name = string("op_5600_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5600_dilations_0 = const()[name = string("op_5600_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5600_groups_0 = const()[name = string("op_5600_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_30_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405284224))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408561088))))[name = string("layers_30_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_30_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_30_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408561216)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5600_cast_fp16 = conv(bias = layers_30_fc2_inlier_module_bias_to_fp16, dilations = var_5600_dilations_0, groups = var_5600_groups_0, pad = var_5600_pad_0, pad_type = var_5600_pad_type_0, strides = var_5600_strides_0, weight = layers_30_fc2_inlier_module_weight_to_fp16_palettized, x = input_247_cast_fp16)[name = string("op_5600_cast_fp16")];
+            string var_5606_pad_type_0 = const()[name = string("op_5606_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5606_strides_0 = const()[name = string("op_5606_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5606_pad_0 = const()[name = string("op_5606_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5606_dilations_0 = const()[name = string("op_5606_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5606_groups_0 = const()[name = string("op_5606_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_30_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408624832))), nonzero_data = tensor<fp16, [30449]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(408563840))))[name = string("layers_30_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5606_cast_fp16 = conv(dilations = var_5606_dilations_0, groups = var_5606_groups_0, pad = var_5606_pad_0, pad_type = var_5606_pad_type_0, strides = var_5606_strides_0, weight = layers_30_fc2_outlier_module_weight_to_fp16_sparsified, x = input_247_cast_fp16)[name = string("op_5606_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_65_cast_fp16 = add(x = var_5600_cast_fp16, y = var_5606_cast_fp16)[name = string("hidden_states_65_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = hidden_states_65_cast_fp16)[name = string("inputs_125_cast_fp16")];
+            int32 var_5616 = const()[name = string("op_5616"), val = int32(3)];
+            tensor<int32, [1]> out_125_axes_0 = const()[name = string("out_125_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5635_to_fp16 = const()[name = string("op_5635_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_125_cast_fp16 = layer_norm(axes = out_125_axes_0, epsilon = var_5635_to_fp16, x = inputs_125_cast_fp16)[name = string("out_125_cast_fp16")];
+            tensor<fp16, [1280]> obj_125_gamma_0_to_fp16 = const()[name = string("obj_125_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409444096)))];
+            tensor<fp16, [1280]> obj_125_beta_0_to_fp16 = const()[name = string("obj_125_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409446720)))];
+            fp16 obj_125_epsilon_0_to_fp16 = const()[name = string("obj_125_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_125_cast_fp16 = batch_norm(beta = obj_125_beta_0_to_fp16, epsilon = obj_125_epsilon_0_to_fp16, gamma = obj_125_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_125_cast_fp16)[name = string("obj_125_cast_fp16")];
+            string var_5657_pad_type_0 = const()[name = string("op_5657_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5657_strides_0 = const()[name = string("op_5657_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5657_pad_0 = const()[name = string("op_5657_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5657_dilations_0 = const()[name = string("op_5657_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5657_groups_0 = const()[name = string("op_5657_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409449344))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410268608))))[name = string("layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_31_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_31_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410268736)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5657_cast_fp16 = conv(bias = layers_31_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5657_dilations_0, groups = var_5657_groups_0, pad = var_5657_pad_0, pad_type = var_5657_pad_type_0, strides = var_5657_strides_0, weight = layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = string("op_5657_cast_fp16")];
+            string var_5663_pad_type_0 = const()[name = string("op_5663_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5663_strides_0 = const()[name = string("op_5663_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5663_pad_0 = const()[name = string("op_5663_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5663_dilations_0 = const()[name = string("op_5663_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5663_groups_0 = const()[name = string("op_5663_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410290240))), nonzero_data = tensor<fp16, [9377]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410271360))))[name = string("layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5663_cast_fp16 = conv(dilations = var_5663_dilations_0, groups = var_5663_groups_0, pad = var_5663_pad_0, pad_type = var_5663_pad_type_0, strides = var_5663_strides_0, weight = layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = string("op_5663_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> query_cast_fp16 = add(x = var_5657_cast_fp16, y = var_5663_cast_fp16)[name = string("query_cast_fp16")];
+            string var_5672_pad_type_0 = const()[name = string("op_5672_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5672_strides_0 = const()[name = string("op_5672_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5672_pad_0 = const()[name = string("op_5672_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5672_dilations_0 = const()[name = string("op_5672_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5672_groups_0 = const()[name = string("op_5672_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(410495104))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411314368))))[name = string("layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5672_cast_fp16 = conv(dilations = var_5672_dilations_0, groups = var_5672_groups_0, pad = var_5672_pad_0, pad_type = var_5672_pad_type_0, strides = var_5672_strides_0, weight = layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = string("op_5672_cast_fp16")];
+            string var_5678_pad_type_0 = const()[name = string("op_5678_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5678_strides_0 = const()[name = string("op_5678_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5678_pad_0 = const()[name = string("op_5678_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5678_dilations_0 = const()[name = string("op_5678_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5678_groups_0 = const()[name = string("op_5678_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411336768))), nonzero_data = tensor<fp16, [11093]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411314496))))[name = string("layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5678_cast_fp16 = conv(dilations = var_5678_dilations_0, groups = var_5678_groups_0, pad = var_5678_pad_0, pad_type = var_5678_pad_type_0, strides = var_5678_strides_0, weight = layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = string("op_5678_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> key_cast_fp16 = add(x = var_5672_cast_fp16, y = var_5678_cast_fp16)[name = string("key_cast_fp16")];
+            string var_5688_pad_type_0 = const()[name = string("op_5688_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5688_strides_0 = const()[name = string("op_5688_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5688_pad_0 = const()[name = string("op_5688_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5688_dilations_0 = const()[name = string("op_5688_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5688_groups_0 = const()[name = string("op_5688_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411541632))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412360896))))[name = string("layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_31_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_31_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412361024)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5688_cast_fp16 = conv(bias = layers_31_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5688_dilations_0, groups = var_5688_groups_0, pad = var_5688_pad_0, pad_type = var_5688_pad_type_0, strides = var_5688_strides_0, weight = layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = string("op_5688_cast_fp16")];
+            string var_5694_pad_type_0 = const()[name = string("op_5694_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5694_strides_0 = const()[name = string("op_5694_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5694_pad_0 = const()[name = string("op_5694_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5694_dilations_0 = const()[name = string("op_5694_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5694_groups_0 = const()[name = string("op_5694_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412376512))), nonzero_data = tensor<fp16, [6384]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412363648))))[name = string("layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5694_cast_fp16 = conv(dilations = var_5694_dilations_0, groups = var_5694_groups_0, pad = var_5694_pad_0, pad_type = var_5694_pad_type_0, strides = var_5694_strides_0, weight = layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = string("op_5694_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> value_cast_fp16 = add(x = var_5688_cast_fp16, y = var_5694_cast_fp16)[name = string("value_cast_fp16")];
+            tensor<int32, [4]> var_5697 = const()[name = string("op_5697"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> mh_q_cast_fp16 = reshape(shape = var_5697, x = query_cast_fp16)[name = string("mh_q_cast_fp16")];
+            fp16 var_5699_to_fp16 = const()[name = string("op_5699_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1500]> var_5700_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_5699_to_fp16)[name = string("op_5700_cast_fp16")];
+            tensor<int32, [4]> var_5701 = const()[name = string("op_5701"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5702_cast_fp16 = reshape(shape = var_5701, x = key_cast_fp16)[name = string("op_5702_cast_fp16")];
+            bool mh_w_transpose_x_0 = const()[name = string("mh_w_transpose_x_0"), val = bool(true)];
+            bool mh_w_transpose_y_0 = const()[name = string("mh_w_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1500, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_5700_cast_fp16, y = var_5702_cast_fp16)[name = string("mh_w_cast_fp16")];
+            tensor<fp16, [1, 20, 1500, 1500]> var_5705_cast_fp16 = softmax(axis = var_5616, x = mh_w_cast_fp16)[name = string("op_5705_cast_fp16")];
+            tensor<int32, [4]> var_5706 = const()[name = string("op_5706"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1500]> var_5707_cast_fp16 = reshape(shape = var_5706, x = value_cast_fp16)[name = string("op_5707_cast_fp16")];
+            bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)];
+            bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1500]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_5707_cast_fp16, y = var_5705_cast_fp16)[name = string("attn_cast_fp16")];
+            tensor<int32, [4]> var_5710 = const()[name = string("op_5710"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1500]> input_249_cast_fp16 = reshape(shape = var_5710, x = attn_cast_fp16)[name = string("input_249_cast_fp16")];
+            string var_5720_pad_type_0 = const()[name = string("op_5720_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5720_strides_0 = const()[name = string("op_5720_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5720_pad_0 = const()[name = string("op_5720_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5720_dilations_0 = const()[name = string("op_5720_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5720_groups_0 = const()[name = string("op_5720_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412581376))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413400640))))[name = string("layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_31_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_31_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413400768)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5720_cast_fp16 = conv(bias = layers_31_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_5720_dilations_0, groups = var_5720_groups_0, pad = var_5720_pad_0, pad_type = var_5720_pad_type_0, strides = var_5720_strides_0, weight = layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_249_cast_fp16)[name = string("op_5720_cast_fp16")];
+            string var_5726_pad_type_0 = const()[name = string("op_5726_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5726_strides_0 = const()[name = string("op_5726_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5726_pad_0 = const()[name = string("op_5726_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5726_dilations_0 = const()[name = string("op_5726_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5726_groups_0 = const()[name = string("op_5726_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413416000))), nonzero_data = tensor<fp16, [6256]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413403392))))[name = string("layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5726_cast_fp16 = conv(dilations = var_5726_dilations_0, groups = var_5726_groups_0, pad = var_5726_pad_0, pad_type = var_5726_pad_type_0, strides = var_5726_strides_0, weight = layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_249_cast_fp16)[name = string("op_5726_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> obj_cast_fp16 = add(x = var_5720_cast_fp16, y = var_5726_cast_fp16)[name = string("obj_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = obj_cast_fp16)[name = string("inputs_127_cast_fp16")];
+            tensor<int32, [1]> out_127_axes_0 = const()[name = string("out_127_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5737_to_fp16 = const()[name = string("op_5737_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_5737_to_fp16, x = inputs_127_cast_fp16)[name = string("out_127_cast_fp16")];
+            tensor<fp16, [1280]> input_251_gamma_0_to_fp16 = const()[name = string("input_251_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413620864)))];
+            tensor<fp16, [1280]> input_251_beta_0_to_fp16 = const()[name = string("input_251_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413623488)))];
+            fp16 input_251_epsilon_0_to_fp16 = const()[name = string("input_251_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> input_251_cast_fp16 = batch_norm(beta = input_251_beta_0_to_fp16, epsilon = input_251_epsilon_0_to_fp16, gamma = input_251_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_127_cast_fp16)[name = string("input_251_cast_fp16")];
+            string var_5755_pad_type_0 = const()[name = string("op_5755_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5755_strides_0 = const()[name = string("op_5755_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5755_pad_0 = const()[name = string("op_5755_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5755_dilations_0 = const()[name = string("op_5755_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5755_groups_0 = const()[name = string("op_5755_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_31_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413626112))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416902976))))[name = string("layers_31_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_31_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_31_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416903104)))];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5755_cast_fp16 = conv(bias = layers_31_fc1_inlier_module_bias_to_fp16, dilations = var_5755_dilations_0, groups = var_5755_groups_0, pad = var_5755_pad_0, pad_type = var_5755_pad_type_0, strides = var_5755_strides_0, weight = layers_31_fc1_inlier_module_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = string("op_5755_cast_fp16")];
+            string var_5761_pad_type_0 = const()[name = string("op_5761_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5761_strides_0 = const()[name = string("op_5761_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5761_pad_0 = const()[name = string("op_5761_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5761_dilations_0 = const()[name = string("op_5761_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5761_groups_0 = const()[name = string("op_5761_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_31_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416975104))), nonzero_data = tensor<fp16, [30796]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(416913408))))[name = string("layers_31_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1500]> var_5761_cast_fp16 = conv(dilations = var_5761_dilations_0, groups = var_5761_groups_0, pad = var_5761_pad_0, pad_type = var_5761_pad_type_0, strides = var_5761_strides_0, weight = layers_31_fc1_outlier_module_weight_to_fp16_sparsified, x = input_251_cast_fp16)[name = string("op_5761_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_253_cast_fp16 = add(x = var_5755_cast_fp16, y = var_5761_cast_fp16)[name = string("input_253_cast_fp16")];
+            string input_255_mode_0 = const()[name = string("input_255_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1500]> input_255_cast_fp16 = gelu(mode = input_255_mode_0, x = input_253_cast_fp16)[name = string("input_255_cast_fp16")];
+            string var_5772_pad_type_0 = const()[name = string("op_5772_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5772_strides_0 = const()[name = string("op_5772_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5772_pad_0 = const()[name = string("op_5772_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5772_dilations_0 = const()[name = string("op_5772_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5772_groups_0 = const()[name = string("op_5772_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_31_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(417794368))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421071232))))[name = string("layers_31_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_31_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_31_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421071360)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5772_cast_fp16 = conv(bias = layers_31_fc2_inlier_module_bias_to_fp16, dilations = var_5772_dilations_0, groups = var_5772_groups_0, pad = var_5772_pad_0, pad_type = var_5772_pad_type_0, strides = var_5772_strides_0, weight = layers_31_fc2_inlier_module_weight_to_fp16_palettized, x = input_255_cast_fp16)[name = string("op_5772_cast_fp16")];
+            string var_5778_pad_type_0 = const()[name = string("op_5778_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5778_strides_0 = const()[name = string("op_5778_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5778_pad_0 = const()[name = string("op_5778_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5778_dilations_0 = const()[name = string("op_5778_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5778_groups_0 = const()[name = string("op_5778_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_31_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421144256))), nonzero_data = tensor<fp16, [35103]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421073984))))[name = string("layers_31_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5778_cast_fp16 = conv(dilations = var_5778_dilations_0, groups = var_5778_groups_0, pad = var_5778_pad_0, pad_type = var_5778_pad_type_0, strides = var_5778_strides_0, weight = layers_31_fc2_outlier_module_weight_to_fp16_sparsified, x = input_255_cast_fp16)[name = string("op_5778_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> hidden_states_cast_fp16 = add(x = var_5772_cast_fp16, y = var_5778_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> inputs_cast_fp16 = add(x = inputs_127_cast_fp16, y = hidden_states_cast_fp16)[name = string("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = string("out_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_5793_to_fp16 = const()[name = string("op_5793_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_5793_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")];
+            tensor<fp16, [1280]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421963520)))];
+            tensor<fp16, [1280]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421966144)))];
+            fp16 encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_105_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = string("encoder_output_embeds_type_fp32_cast_fp16")];
+            string var_5819_pad_type_0 = const()[name = string("op_5819_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5819_strides_0 = const()[name = string("op_5819_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5819_pad_0 = const()[name = string("op_5819_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5819_dilations_0 = const()[name = string("op_5819_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5819_groups_0 = const()[name = string("op_5819_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421968768))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422788032))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5819_cast_fp16 = conv(dilations = var_5819_dilations_0, groups = var_5819_groups_0, pad = var_5819_pad_0, pad_type = var_5819_pad_type_0, strides = var_5819_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_5819_cast_fp16")];
+            string var_5825_pad_type_0 = const()[name = string("op_5825_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5825_strides_0 = const()[name = string("op_5825_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5825_pad_0 = const()[name = string("op_5825_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5825_dilations_0 = const()[name = string("op_5825_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5825_groups_0 = const()[name = string("op_5825_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422848128))), nonzero_data = tensor<fp16, [29949]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(422788160))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5825_cast_fp16 = conv(dilations = var_5825_dilations_0, groups = var_5825_groups_0, pad = var_5825_pad_0, pad_type = var_5825_pad_type_0, strides = var_5825_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_5825_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5826_cast_fp16 = add(x = var_5819_cast_fp16, y = var_5825_cast_fp16)[name = string("op_5826_cast_fp16")];
+            string var_5835_pad_type_0 = const()[name = string("op_5835_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5835_strides_0 = const()[name = string("op_5835_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5835_pad_0 = const()[name = string("op_5835_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5835_dilations_0 = const()[name = string("op_5835_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5835_groups_0 = const()[name = string("op_5835_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423052992))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423872256))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423872384)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5835_cast_fp16 = conv(bias = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5835_dilations_0, groups = var_5835_groups_0, pad = var_5835_pad_0, pad_type = var_5835_pad_type_0, strides = var_5835_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_5835_cast_fp16")];
+            string var_5841_pad_type_0 = const()[name = string("op_5841_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5841_strides_0 = const()[name = string("op_5841_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5841_pad_0 = const()[name = string("op_5841_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5841_dilations_0 = const()[name = string("op_5841_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5841_groups_0 = const()[name = string("op_5841_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423886272))), nonzero_data = tensor<fp16, [5596]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423875008))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5841_cast_fp16 = conv(dilations = var_5841_dilations_0, groups = var_5841_groups_0, pad = var_5841_pad_0, pad_type = var_5841_pad_type_0, strides = var_5841_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_5841_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5842_cast_fp16 = add(x = var_5835_cast_fp16, y = var_5841_cast_fp16)[name = string("op_5842_cast_fp16")];
+            string var_5862_pad_type_0 = const()[name = string("op_5862_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5862_strides_0 = const()[name = string("op_5862_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5862_pad_0 = const()[name = string("op_5862_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5862_dilations_0 = const()[name = string("op_5862_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5862_groups_0 = const()[name = string("op_5862_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424091136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424910400))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5862_cast_fp16 = conv(dilations = var_5862_dilations_0, groups = var_5862_groups_0, pad = var_5862_pad_0, pad_type = var_5862_pad_type_0, strides = var_5862_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_5862_cast_fp16")];
+            string var_5868_pad_type_0 = const()[name = string("op_5868_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5868_strides_0 = const()[name = string("op_5868_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5868_pad_0 = const()[name = string("op_5868_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5868_dilations_0 = const()[name = string("op_5868_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5868_groups_0 = const()[name = string("op_5868_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424953984))), nonzero_data = tensor<fp16, [21667]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424910528))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5868_cast_fp16 = conv(dilations = var_5868_dilations_0, groups = var_5868_groups_0, pad = var_5868_pad_0, pad_type = var_5868_pad_type_0, strides = var_5868_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_5868_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5869_cast_fp16 = add(x = var_5862_cast_fp16, y = var_5868_cast_fp16)[name = string("op_5869_cast_fp16")];
+            string var_5878_pad_type_0 = const()[name = string("op_5878_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5878_strides_0 = const()[name = string("op_5878_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5878_pad_0 = const()[name = string("op_5878_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5878_dilations_0 = const()[name = string("op_5878_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5878_groups_0 = const()[name = string("op_5878_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425158848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425978112))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425978240)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5878_cast_fp16 = conv(bias = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5878_dilations_0, groups = var_5878_groups_0, pad = var_5878_pad_0, pad_type = var_5878_pad_type_0, strides = var_5878_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_5878_cast_fp16")];
+            string var_5884_pad_type_0 = const()[name = string("op_5884_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5884_strides_0 = const()[name = string("op_5884_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5884_pad_0 = const()[name = string("op_5884_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5884_dilations_0 = const()[name = string("op_5884_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5884_groups_0 = const()[name = string("op_5884_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425992064))), nonzero_data = tensor<fp16, [5557]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425980864))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5884_cast_fp16 = conv(dilations = var_5884_dilations_0, groups = var_5884_groups_0, pad = var_5884_pad_0, pad_type = var_5884_pad_type_0, strides = var_5884_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_5884_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5885_cast_fp16 = add(x = var_5878_cast_fp16, y = var_5884_cast_fp16)[name = string("op_5885_cast_fp16")];
+            string var_5905_pad_type_0 = const()[name = string("op_5905_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5905_strides_0 = const()[name = string("op_5905_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5905_pad_0 = const()[name = string("op_5905_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5905_dilations_0 = const()[name = string("op_5905_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5905_groups_0 = const()[name = string("op_5905_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(426196928))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427016192))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5905_cast_fp16 = conv(dilations = var_5905_dilations_0, groups = var_5905_groups_0, pad = var_5905_pad_0, pad_type = var_5905_pad_type_0, strides = var_5905_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_5905_cast_fp16")];
+            string var_5911_pad_type_0 = const()[name = string("op_5911_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5911_strides_0 = const()[name = string("op_5911_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5911_pad_0 = const()[name = string("op_5911_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5911_dilations_0 = const()[name = string("op_5911_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5911_groups_0 = const()[name = string("op_5911_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427044160))), nonzero_data = tensor<fp16, [13879]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427016320))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5911_cast_fp16 = conv(dilations = var_5911_dilations_0, groups = var_5911_groups_0, pad = var_5911_pad_0, pad_type = var_5911_pad_type_0, strides = var_5911_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_5911_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5912_cast_fp16 = add(x = var_5905_cast_fp16, y = var_5911_cast_fp16)[name = string("op_5912_cast_fp16")];
+            string var_5921_pad_type_0 = const()[name = string("op_5921_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5921_strides_0 = const()[name = string("op_5921_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5921_pad_0 = const()[name = string("op_5921_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5921_dilations_0 = const()[name = string("op_5921_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5921_groups_0 = const()[name = string("op_5921_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427249024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428068288))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428068416)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5921_cast_fp16 = conv(bias = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5921_dilations_0, groups = var_5921_groups_0, pad = var_5921_pad_0, pad_type = var_5921_pad_type_0, strides = var_5921_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_5921_cast_fp16")];
+            string var_5927_pad_type_0 = const()[name = string("op_5927_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5927_strides_0 = const()[name = string("op_5927_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5927_pad_0 = const()[name = string("op_5927_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5927_dilations_0 = const()[name = string("op_5927_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5927_groups_0 = const()[name = string("op_5927_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428082624))), nonzero_data = tensor<fp16, [5756]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428071040))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5927_cast_fp16 = conv(dilations = var_5927_dilations_0, groups = var_5927_groups_0, pad = var_5927_pad_0, pad_type = var_5927_pad_type_0, strides = var_5927_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_5927_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5928_cast_fp16 = add(x = var_5921_cast_fp16, y = var_5927_cast_fp16)[name = string("op_5928_cast_fp16")];
+            string var_5948_pad_type_0 = const()[name = string("op_5948_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5948_strides_0 = const()[name = string("op_5948_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5948_pad_0 = const()[name = string("op_5948_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5948_dilations_0 = const()[name = string("op_5948_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5948_groups_0 = const()[name = string("op_5948_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(428287488))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429106752))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5948_cast_fp16 = conv(dilations = var_5948_dilations_0, groups = var_5948_groups_0, pad = var_5948_pad_0, pad_type = var_5948_pad_type_0, strides = var_5948_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_5948_cast_fp16")];
+            string var_5954_pad_type_0 = const()[name = string("op_5954_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5954_strides_0 = const()[name = string("op_5954_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5954_pad_0 = const()[name = string("op_5954_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5954_dilations_0 = const()[name = string("op_5954_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5954_groups_0 = const()[name = string("op_5954_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429129600))), nonzero_data = tensor<fp16, [11308]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429106880))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5954_cast_fp16 = conv(dilations = var_5954_dilations_0, groups = var_5954_groups_0, pad = var_5954_pad_0, pad_type = var_5954_pad_type_0, strides = var_5954_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_5954_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> k_cast_fp16 = add(x = var_5948_cast_fp16, y = var_5954_cast_fp16)[name = string("k_cast_fp16")];
+            string var_5964_pad_type_0 = const()[name = string("op_5964_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5964_strides_0 = const()[name = string("op_5964_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5964_pad_0 = const()[name = string("op_5964_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5964_dilations_0 = const()[name = string("op_5964_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5964_groups_0 = const()[name = string("op_5964_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(429334464))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430153728))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430153856)))];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5964_cast_fp16 = conv(bias = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5964_dilations_0, groups = var_5964_groups_0, pad = var_5964_pad_0, pad_type = var_5964_pad_type_0, strides = var_5964_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_5964_cast_fp16")];
+            string var_5970_pad_type_0 = const()[name = string("op_5970_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_5970_strides_0 = const()[name = string("op_5970_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_5970_pad_0 = const()[name = string("op_5970_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_5970_dilations_0 = const()[name = string("op_5970_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_5970_groups_0 = const()[name = string("op_5970_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430170304))), nonzero_data = tensor<fp16, [6870]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430156480))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1500]> var_5970_cast_fp16 = conv(dilations = var_5970_dilations_0, groups = var_5970_groups_0, pad = var_5970_pad_0, pad_type = var_5970_pad_type_0, strides = var_5970_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_5970_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1500]> v_cast_fp16 = add(x = var_5964_cast_fp16, y = var_5970_cast_fp16)[name = string("v_cast_fp16")];
+            int32 var_5976 = const()[name = string("op_5976"), val = int32(0)];
+            bool input_259_interleave_0 = const()[name = string("input_259_interleave_0"), val = bool(false)];
+            tensor<fp16, [4, 1280, 1, 1500]> input_259_cast_fp16 = concat(axis = var_5976, interleave = input_259_interleave_0, values = (var_5826_cast_fp16, var_5869_cast_fp16, var_5912_cast_fp16, k_cast_fp16))[name = string("input_259_cast_fp16")];
+            int32 var_5979 = const()[name = string("op_5979"), val = int32(0)];
+            bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)];
+            tensor<fp16, [4, 1280, 1, 1500]> input_cast_fp16 = concat(axis = var_5979, interleave = input_interleave_0, values = (var_5842_cast_fp16, var_5885_cast_fp16, var_5928_cast_fp16, v_cast_fp16))[name = string("input_cast_fp16")];
+            tensor<int32, [8]> var_5986_pad_0 = const()[name = string("op_5986_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 36])];
+            string var_5986_mode_0 = const()[name = string("op_5986_mode_0"), val = string("constant")];
+            fp16 const_33_to_fp16 = const()[name = string("const_33_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [4, 1280, 1, 1536]> encoder_attn_key_cache = pad(constant_val = const_33_to_fp16, mode = var_5986_mode_0, pad = var_5986_pad_0, x = input_259_cast_fp16)[name = string("op_5986_cast_fp16")];
+            tensor<int32, [8]> var_5992_pad_0 = const()[name = string("op_5992_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 36])];
+            string var_5992_mode_0 = const()[name = string("op_5992_mode_0"), val = string("constant")];
+            fp16 const_34_to_fp16 = const()[name = string("const_34_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [4, 1280, 1, 1536]> encoder_attn_value_cache = pad(constant_val = const_34_to_fp16, mode = var_5992_mode_0, pad = var_5992_pad_0, x = input_cast_fp16)[name = string("op_5992_cast_fp16")];
+        } -> (encoder_output_embeds, encoder_attn_key_cache, encoder_attn_value_cache);
+}
\ No newline at end of file
diff --git a/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mlmodel b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..ef1d9c31d1cc6287fdfafa45ae7295acf4a10c6f
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9ff86aadad04b5a33905359c151be0c2bca6fca7212b6856376508716072906
+size 798066
diff --git a/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d7c2a8d14888a317e1501407796c93b56a445bc
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1513c45d5252aafd4b7a949d783c5e80786766432017a62227d131958c9a30c
+size 430375168
diff --git a/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3ba3246801c85f92f79ac029f59b94e7fb646f85
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0980462db89a546e1e90888ea38e0a5ddf1f1fec84608802cdbb12f8a5cc7215
+size 243
diff --git a/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..780171e73cd57a772ec0457470f0c8b86f4c73cd
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6475c6649047ce609e3fe84b2525843c03342820662404540baf28146c174014
+size 329
diff --git a/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..65be90aad1d0e5f73a1f50b19705ccad3c0da822
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,74 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 128 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 128, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.mul" : 2,
+      "Ios18.square" : 2,
+      "Ios18.conv" : 2,
+      "Ios18.matmul" : 1,
+      "Ios18.expandDims" : 4,
+      "Ios18.sub" : 1,
+      "Ios18.log" : 1,
+      "Ios18.add" : 3,
+      "Ios18.sliceByIndex" : 1,
+      "Ios18.maximum" : 1,
+      "Ios18.squeeze" : 2,
+      "Ios18.reshape" : 2,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Pad" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..dfa5fd24b4ee5bc7445ee14fae66661e9d635a5f
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = string("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = string("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            string input_3_mode_0 = const()[name = string("input_3_mode_0"), val = string("reflect")];
+            fp16 const_1_to_fp16 = const()[name = string("const_1_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = string("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = string("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = string("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = string("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = string("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = string("expand_dims_4_cast_fp16")];
+            string conv_0_pad_type_0 = const()[name = string("conv_0_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = string("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = string("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 conv_0_groups_0 = const()[name = string("conv_0_groups_0"), val = int32(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = string("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = string("conv_0_cast_fp16")];
+            string conv_1_pad_type_0 = const()[name = string("conv_1_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = string("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = string("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 conv_1_groups_0 = const()[name = string("conv_1_groups_0"), val = int32(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = string("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = string("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = string("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = string("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = string("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = string("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = string("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = string("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = string("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = string("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = string("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = string("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = string("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = string("magnitudes_cast_fp16")];
+            bool mel_spec_1_transpose_x_0 = const()[name = string("mel_spec_1_transpose_x_0"), val = bool(false)];
+            bool mel_spec_1_transpose_y_0 = const()[name = string("mel_spec_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [128, 201]> mel_filters_to_fp16 = const()[name = string("mel_filters_to_fp16"), val = tensor<fp16, [128, 201]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321856)))];
+            tensor<fp16, [128, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = string("mel_spec_1_cast_fp16")];
+            fp16 var_41_to_fp16 = const()[name = string("op_41_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [128, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = string("mel_spec_cast_fp16")];
+            fp32 log_0_epsilon_0 = const()[name = string("log_0_epsilon_0"), val = fp32(0x1p-149)];
+            tensor<fp16, [128, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0, x = mel_spec_cast_fp16)[name = string("log_0_cast_fp16")];
+            fp16 mul_0_y_0_to_fp16 = const()[name = string("mul_0_y_0_to_fp16"), val = fp16(0x1.bccp-2)];
+            tensor<fp16, [128, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = string("mul_0_cast_fp16")];
+            bool var_44_keep_dims_0 = const()[name = string("op_44_keep_dims_0"), val = bool(false)];
+            fp16 var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = string("op_44_cast_fp16")];
+            fp16 var_46_to_fp16 = const()[name = string("op_46_to_fp16"), val = fp16(0x1p+3)];
+            fp16 var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = string("op_47_cast_fp16")];
+            tensor<fp16, [128, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = string("log_spec_3_cast_fp16")];
+            fp16 var_50_to_fp16 = const()[name = string("op_50_to_fp16"), val = fp16(0x1p+2)];
+            tensor<fp16, [128, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = string("op_51_cast_fp16")];
+            fp16 _inversed_log_spec_y_0_to_fp16 = const()[name = string("_inversed_log_spec_y_0_to_fp16"), val = fp16(0x1p-2)];
+            tensor<fp16, [128, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = string("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = string("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 128, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = string("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = string("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 128, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = string("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2ae170c9000db89326cc2600450001654bb10f7f
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:009d9fb8f6b589accfa08cebf1c712ef07c3405229ce3cfb3a57ee033c9d8a49
+size 373376
diff --git a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..359bc34d390c2efddcb2331982005a91d383051e
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e77577edea445aa84e0b74bedc71812d8321b3861d62fd1ceee924f6f920f20a
+size 243
diff --git a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..89928605833c60a71a2f6f55f19c9a96d133f403
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4901c5e4249e42e8f37325412eb6fcf9ca9c5e22660271613675afed77cff8f
+size 754
diff --git a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/metadata.json b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..2d0bf78ea44099fa71eaa8c07d90f2a84c5297a0
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,185 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (4 bits), Sparse)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51866)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51866]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 5120 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 5120, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 5120 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 5120, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 8,
+      "Ios18.softmax" : 8,
+      "Ios18.mul" : 16,
+      "Ios18.matmul" : 16,
+      "Ios18.batchNorm" : 13,
+      "Ios16.reduceMean" : 1,
+      "Split" : 2,
+      "Ios18.readState" : 5,
+      "Ios18.gather" : 3,
+      "Ios18.add" : 62,
+      "Ios18.layerNorm" : 13,
+      "Ios18.reshape" : 32,
+      "Ios18.constexprLutToDense" : 32,
+      "Ios18.constexprSparseToDense" : 33,
+      "Ios18.conv" : 64,
+      "Ios18.gelu" : 4,
+      "Ios18.linear" : 1,
+      "Ios18.cast" : 1,
+      "Ios18.transpose" : 1,
+      "Ios18.concat" : 3,
+      "Ios18.sliceByIndex" : 20,
+      "Ios18.squeeze" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536]",
+        "name" : "encoder_attn_key_padding_mask",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1280 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[4, 1280, 1, 1536]",
+        "name" : "encoder_attn_key_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1280 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[4, 1280, 1, 1536]",
+        "name" : "encoder_attn_value_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1280 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[4, 1280, 1, 448]",
+        "name" : "self_attn_key_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 4 × 1280 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[4, 1280, 1, 448]",
+        "name" : "self_attn_value_cache",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoderStateful",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..15d32eefa4d9183c5ba88cba6bc4fc9a239cc434
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,941 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 448]> decoder_key_padding_mask, state<tensor<fp16, [4, 1280, 1, 1536]>> encoder_attn_key_cache, state<tensor<fp16, [1, 1536]>> encoder_attn_key_padding_mask, state<tensor<fp16, [4, 1280, 1, 1536]>> encoder_attn_value_cache, tensor<int32, [1]> input_ids, tensor<fp16, [1, 448]> kv_cache_update_mask, state<tensor<fp16, [4, 1280, 1, 448]>> self_attn_key_cache, state<tensor<fp16, [4, 1280, 1, 448]>> self_attn_value_cache) {
+            int32 var_26_axis_0 = const()[name = string("op_26_axis_0"), val = int32(0)];
+            int32 var_26_batch_dims_0 = const()[name = string("op_26_batch_dims_0"), val = int32(0)];
+            bool var_26_validate_indices_0 = const()[name = string("op_26_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51866, 1280]> embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51866, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, 1280]> var_26_cast_fp16 = gather(axis = var_26_axis_0, batch_dims = var_26_batch_dims_0, indices = input_ids, validate_indices = var_26_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_26_cast_fp16")];
+            int32 var_33_axis_0 = const()[name = string("op_33_axis_0"), val = int32(0)];
+            int32 var_33_batch_dims_0 = const()[name = string("op_33_batch_dims_0"), val = int32(0)];
+            bool var_33_validate_indices_0 = const()[name = string("op_33_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [448, 1280]> embed_positions_inlier_module_weight_to_fp16 = const()[name = string("embed_positions_inlier_module_weight_to_fp16"), val = tensor<fp16, [448, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132777088)))];
+            string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")];
+            tensor<uint16, [1]> cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_0")];
+            tensor<fp16, [1, 1280]> var_33_cast_fp16_cast_uint16 = gather(axis = var_33_axis_0, batch_dims = var_33_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_33_validate_indices_0, x = embed_positions_inlier_module_weight_to_fp16)[name = string("op_33_cast_fp16_cast_uint16")];
+            int32 var_35_axis_0 = const()[name = string("op_35_axis_0"), val = int32(0)];
+            int32 var_35_batch_dims_0 = const()[name = string("op_35_batch_dims_0"), val = int32(0)];
+            bool var_35_validate_indices_0 = const()[name = string("op_35_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [448, 1280]> embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [448, 1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133941312))), nonzero_data = tensor<fp16, [8582]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133924032))))[name = string("embed_positions_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280]> var_35_cast_fp16_cast_uint16 = gather(axis = var_35_axis_0, batch_dims = var_35_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_35_validate_indices_0, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = string("op_35_cast_fp16_cast_uint16")];
+            tensor<fp16, [1, 1280]> var_36_cast_fp16 = add(x = var_33_cast_fp16_cast_uint16, y = var_35_cast_fp16_cast_uint16)[name = string("op_36_cast_fp16")];
+            tensor<fp16, [1, 1280]> hidden_states_1_cast_fp16 = add(x = var_26_cast_fp16, y = var_36_cast_fp16)[name = string("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_50_axes_0 = const()[name = string("op_50_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1]> var_50_cast_fp16 = expand_dims(axes = var_50_axes_0, x = hidden_states_1_cast_fp16)[name = string("op_50_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = string("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_50_cast_fp16)[name = string("inputs_1_cast_fp16")];
+            tensor<fp16, [4, 1280, 1, 448]> read_state_0 = read_state(input = self_attn_key_cache)[name = string("read_state_0")];
+            tensor<int32, [4]> tile_0 = const()[name = string("tile_0"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            int32 var_55_axis_0 = const()[name = string("op_55_axis_0"), val = int32(0)];
+            tensor<fp16, [1, 1280, 1, 448]> var_55_cast_fp16_0, tensor<fp16, [1, 1280, 1, 448]> var_55_cast_fp16_1, tensor<fp16, [1, 1280, 1, 448]> var_55_cast_fp16_2, tensor<fp16, [1, 1280, 1, 448]> var_55_cast_fp16_3 = split(axis = var_55_axis_0, split_sizes = tile_0, x = read_state_0)[name = string("op_55_cast_fp16")];
+            tensor<fp16, [4, 1280, 1, 448]> read_state_1 = read_state(input = self_attn_value_cache)[name = string("read_state_1")];
+            tensor<int32, [4]> tile_1 = const()[name = string("tile_1"), val = tensor<int32, [4]>([1, 1, 1, 1])];
+            int32 var_62_axis_0 = const()[name = string("op_62_axis_0"), val = int32(0)];
+            tensor<fp16, [1, 1280, 1, 448]> var_62_cast_fp16_0, tensor<fp16, [1, 1280, 1, 448]> var_62_cast_fp16_1, tensor<fp16, [1, 1280, 1, 448]> var_62_cast_fp16_2, tensor<fp16, [1, 1280, 1, 448]> var_62_cast_fp16_3 = split(axis = var_62_axis_0, split_sizes = tile_1, x = read_state_1)[name = string("op_62_cast_fp16")];
+            tensor<fp16, [4, 1280, 1, 1536]> read_state_2 = read_state(input = encoder_attn_key_cache)[name = string("read_state_2")];
+            tensor<int32, [4]> obj_17_begin_0 = const()[name = string("obj_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> obj_17_end_0 = const()[name = string("obj_17_end_0"), val = tensor<int32, [4]>([1, 1280, 1, 1536])];
+            tensor<bool, [4]> obj_17_end_mask_0 = const()[name = string("obj_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 1280, 1, 1536]> obj_17_cast_fp16 = slice_by_index(begin = obj_17_begin_0, end = obj_17_end_0, end_mask = obj_17_end_mask_0, x = read_state_2)[name = string("obj_17_cast_fp16")];
+            tensor<fp16, [4, 1280, 1, 1536]> read_state_3 = read_state(input = encoder_attn_value_cache)[name = string("read_state_3")];
+            tensor<int32, [4]> obj_19_begin_0 = const()[name = string("obj_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> obj_19_end_0 = const()[name = string("obj_19_end_0"), val = tensor<int32, [4]>([1, 1280, 1, 1536])];
+            tensor<bool, [4]> obj_19_end_mask_0 = const()[name = string("obj_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 1280, 1, 1536]> obj_19_cast_fp16 = slice_by_index(begin = obj_19_begin_0, end = obj_19_end_0, end_mask = obj_19_end_mask_0, x = read_state_3)[name = string("obj_19_cast_fp16")];
+            int32 var_82 = const()[name = string("op_82"), val = int32(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_107_to_fp16 = const()[name = string("op_107_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_107_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")];
+            tensor<fp16, [1280]> obj_5_mean_0_to_fp16 = const()[name = string("obj_5_mean_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134013056)))];
+            tensor<fp16, [1280]> obj_5_variance_0_to_fp16 = const()[name = string("obj_5_variance_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134015680)))];
+            tensor<fp16, [1280]> obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134018304)))];
+            tensor<fp16, [1280]> obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134020928)))];
+            fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_5_cast_fp16")];
+            string var_129_pad_type_0 = const()[name = string("op_129_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_129_strides_0 = const()[name = string("op_129_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_129_pad_0 = const()[name = string("op_129_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_129_dilations_0 = const()[name = string("op_129_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_129_groups_0 = const()[name = string("op_129_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134023552))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134842816))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134842944)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_129_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_129_dilations_0, groups = var_129_groups_0, pad = var_129_pad_0, pad_type = var_129_pad_type_0, strides = var_129_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_129_cast_fp16")];
+            string var_135_pad_type_0 = const()[name = string("op_135_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_135_strides_0 = const()[name = string("op_135_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_135_pad_0 = const()[name = string("op_135_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_135_dilations_0 = const()[name = string("op_135_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_135_groups_0 = const()[name = string("op_135_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134918592))), nonzero_data = tensor<fp16, [36461]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134845568))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_135_cast_fp16 = conv(dilations = var_135_dilations_0, groups = var_135_groups_0, pad = var_135_pad_0, pad_type = var_135_pad_type_0, strides = var_135_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_135_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> query_1_cast_fp16 = add(x = var_129_cast_fp16, y = var_135_cast_fp16)[name = string("query_1_cast_fp16")];
+            string var_144_pad_type_0 = const()[name = string("op_144_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_144_strides_0 = const()[name = string("op_144_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_144_pad_0 = const()[name = string("op_144_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_144_dilations_0 = const()[name = string("op_144_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_144_groups_0 = const()[name = string("op_144_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135123456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135942720))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1]> var_144_cast_fp16 = conv(dilations = var_144_dilations_0, groups = var_144_groups_0, pad = var_144_pad_0, pad_type = var_144_pad_type_0, strides = var_144_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_144_cast_fp16")];
+            string var_150_pad_type_0 = const()[name = string("op_150_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_150_strides_0 = const()[name = string("op_150_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_150_pad_0 = const()[name = string("op_150_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_150_dilations_0 = const()[name = string("op_150_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_150_groups_0 = const()[name = string("op_150_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135976320))), nonzero_data = tensor<fp16, [16673]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135942848))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_150_cast_fp16 = conv(dilations = var_150_dilations_0, groups = var_150_groups_0, pad = var_150_pad_0, pad_type = var_150_pad_type_0, strides = var_150_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_150_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> current_key_1_cast_fp16 = add(x = var_144_cast_fp16, y = var_150_cast_fp16)[name = string("current_key_1_cast_fp16")];
+            string var_160_pad_type_0 = const()[name = string("op_160_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_160_strides_0 = const()[name = string("op_160_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_160_pad_0 = const()[name = string("op_160_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_160_dilations_0 = const()[name = string("op_160_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_160_groups_0 = const()[name = string("op_160_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136181184))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137000448))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137000576)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_160_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_160_dilations_0, groups = var_160_groups_0, pad = var_160_pad_0, pad_type = var_160_pad_type_0, strides = var_160_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_160_cast_fp16")];
+            string var_166_pad_type_0 = const()[name = string("op_166_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_166_strides_0 = const()[name = string("op_166_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_166_pad_0 = const()[name = string("op_166_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_166_dilations_0 = const()[name = string("op_166_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_166_groups_0 = const()[name = string("op_166_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137046720))), nonzero_data = tensor<fp16, [21721]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137003200))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_166_cast_fp16 = conv(dilations = var_166_dilations_0, groups = var_166_groups_0, pad = var_166_pad_0, pad_type = var_166_pad_type_0, strides = var_166_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_166_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> current_value_1_cast_fp16 = add(x = var_160_cast_fp16, y = var_166_cast_fp16)[name = string("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_169_axes_0 = const()[name = string("op_169_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_169_cast_fp16 = expand_dims(axes = var_169_axes_0, x = kv_cache_update_mask)[name = string("op_169_cast_fp16")];
+            tensor<int32, [1]> var_170_axes_0 = const()[name = string("op_170_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_170_cast_fp16 = expand_dims(axes = var_170_axes_0, x = var_169_cast_fp16)[name = string("op_170_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> var_172_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_170_cast_fp16)[name = string("op_172_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> key_1_cast_fp16 = add(x = var_55_cast_fp16_0, y = var_172_cast_fp16)[name = string("key_1_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> var_174_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_170_cast_fp16)[name = string("op_174_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> value_1_cast_fp16 = add(x = var_62_cast_fp16_0, y = var_174_cast_fp16)[name = string("value_1_cast_fp16")];
+            tensor<int32, [4]> var_177 = const()[name = string("op_177"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1]> mh_q_1_cast_fp16 = reshape(shape = var_177, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")];
+            fp16 var_179_to_fp16 = const()[name = string("op_179_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1]> var_180_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_179_to_fp16)[name = string("op_180_cast_fp16")];
+            tensor<int32, [4]> var_181 = const()[name = string("op_181"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 448]> var_182_cast_fp16 = reshape(shape = var_181, x = key_1_cast_fp16)[name = string("op_182_cast_fp16")];
+            bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)];
+            bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1, 448]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_180_cast_fp16, y = var_182_cast_fp16)[name = string("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_186_axes_0 = const()[name = string("op_186_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_186_cast_fp16 = expand_dims(axes = var_186_axes_0, x = decoder_key_padding_mask)[name = string("op_186_cast_fp16")];
+            tensor<int32, [1]> var_187_axes_0 = const()[name = string("op_187_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_187_cast_fp16 = expand_dims(axes = var_187_axes_0, x = var_186_cast_fp16)[name = string("op_187_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 448]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 448]> var_190_cast_fp16 = softmax(axis = var_82, x = mh_w_3_cast_fp16)[name = string("op_190_cast_fp16")];
+            tensor<int32, [4]> var_191 = const()[name = string("op_191"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 448]> var_192_cast_fp16 = reshape(shape = var_191, x = value_1_cast_fp16)[name = string("op_192_cast_fp16")];
+            bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)];
+            bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_192_cast_fp16, y = var_190_cast_fp16)[name = string("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_195 = const()[name = string("op_195"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1]> input_1_cast_fp16 = reshape(shape = var_195, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_205_pad_type_0 = const()[name = string("op_205_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_205_strides_0 = const()[name = string("op_205_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_205_pad_0 = const()[name = string("op_205_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_205_dilations_0 = const()[name = string("op_205_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_205_groups_0 = const()[name = string("op_205_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137251584))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138070848))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138070976)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_205_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_205_dilations_0, groups = var_205_groups_0, pad = var_205_pad_0, pad_type = var_205_pad_type_0, strides = var_205_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_205_cast_fp16")];
+            string var_211_pad_type_0 = const()[name = string("op_211_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_211_strides_0 = const()[name = string("op_211_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_211_pad_0 = const()[name = string("op_211_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_211_dilations_0 = const()[name = string("op_211_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_211_groups_0 = const()[name = string("op_211_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138130624))), nonzero_data = tensor<fp16, [28455]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138073600))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_211_cast_fp16 = conv(dilations = var_211_dilations_0, groups = var_211_groups_0, pad = var_211_pad_0, pad_type = var_211_pad_type_0, strides = var_211_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_211_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> obj_11_cast_fp16 = add(x = var_205_cast_fp16, y = var_211_cast_fp16)[name = string("obj_11_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_226_to_fp16 = const()[name = string("op_226_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_226_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")];
+            tensor<fp16, [1280]> obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138335488)))];
+            tensor<fp16, [1280]> obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138338112)))];
+            fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("obj_13_cast_fp16")];
+            string var_246_pad_type_0 = const()[name = string("op_246_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_246_strides_0 = const()[name = string("op_246_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_246_pad_0 = const()[name = string("op_246_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_246_dilations_0 = const()[name = string("op_246_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_246_groups_0 = const()[name = string("op_246_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138340736))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139160000))))[name = string("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139160128)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_246_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_246_dilations_0, groups = var_246_groups_0, pad = var_246_pad_0, pad_type = var_246_pad_type_0, strides = var_246_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_246_cast_fp16")];
+            string var_252_pad_type_0 = const()[name = string("op_252_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_252_strides_0 = const()[name = string("op_252_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_252_pad_0 = const()[name = string("op_252_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_252_dilations_0 = const()[name = string("op_252_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_252_groups_0 = const()[name = string("op_252_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139188224))), nonzero_data = tensor<fp16, [12701]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139162752))))[name = string("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_252_cast_fp16 = conv(dilations = var_252_dilations_0, groups = var_252_groups_0, pad = var_252_pad_0, pad_type = var_252_pad_type_0, strides = var_252_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_252_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> query_3_cast_fp16 = add(x = var_246_cast_fp16, y = var_252_cast_fp16)[name = string("query_3_cast_fp16")];
+            tensor<int32, [4]> var_255 = const()[name = string("op_255"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1]> mh_q_3_cast_fp16 = reshape(shape = var_255, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")];
+            fp16 var_257_to_fp16 = const()[name = string("op_257_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1]> var_258_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_257_to_fp16)[name = string("op_258_cast_fp16")];
+            tensor<int32, [4]> var_259 = const()[name = string("op_259"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1536]> var_260_cast_fp16 = reshape(shape = var_259, x = obj_17_cast_fp16)[name = string("op_260_cast_fp16")];
+            bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)];
+            bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1, 1536]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_258_cast_fp16, y = var_260_cast_fp16)[name = string("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 1536]> read_state_4 = read_state(input = encoder_attn_key_padding_mask)[name = string("read_state_4")];
+            tensor<int32, [1]> var_264_axes_0 = const()[name = string("op_264_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1536]> var_264_cast_fp16 = expand_dims(axes = var_264_axes_0, x = read_state_4)[name = string("op_264_cast_fp16")];
+            tensor<int32, [1]> var_265_axes_0 = const()[name = string("op_265_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 1536]> var_265_cast_fp16 = expand_dims(axes = var_265_axes_0, x = var_264_cast_fp16)[name = string("op_265_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1536]> mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1536]> obj_23_cast_fp16 = softmax(axis = var_82, x = mh_w_7_cast_fp16)[name = string("obj_23_cast_fp16")];
+            tensor<int32, [4]> var_269 = const()[name = string("op_269"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1536]> var_270_cast_fp16 = reshape(shape = var_269, x = obj_19_cast_fp16)[name = string("op_270_cast_fp16")];
+            bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)];
+            bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_270_cast_fp16, y = obj_23_cast_fp16)[name = string("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_273 = const()[name = string("op_273"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1]> input_3_cast_fp16 = reshape(shape = var_273, x = attn_3_cast_fp16)[name = string("input_3_cast_fp16")];
+            string var_283_pad_type_0 = const()[name = string("op_283_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_283_strides_0 = const()[name = string("op_283_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_283_pad_0 = const()[name = string("op_283_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_283_dilations_0 = const()[name = string("op_283_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_283_groups_0 = const()[name = string("op_283_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139393088))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140212352))))[name = string("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140212480)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_283_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_283_dilations_0, groups = var_283_groups_0, pad = var_283_pad_0, pad_type = var_283_pad_type_0, strides = var_283_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_283_cast_fp16")];
+            string var_289_pad_type_0 = const()[name = string("op_289_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_289_strides_0 = const()[name = string("op_289_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_289_pad_0 = const()[name = string("op_289_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_289_dilations_0 = const()[name = string("op_289_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_289_groups_0 = const()[name = string("op_289_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140227264))), nonzero_data = tensor<fp16, [6041]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140215104))))[name = string("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_289_cast_fp16 = conv(dilations = var_289_dilations_0, groups = var_289_groups_0, pad = var_289_pad_0, pad_type = var_289_pad_type_0, strides = var_289_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_289_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> obj_21_cast_fp16 = add(x = var_283_cast_fp16, y = var_289_cast_fp16)[name = string("obj_21_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_21_cast_fp16)[name = string("inputs_5_cast_fp16")];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_300_to_fp16 = const()[name = string("op_300_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_300_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")];
+            tensor<fp16, [1280]> input_5_gamma_0_to_fp16 = const()[name = string("input_5_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140432128)))];
+            tensor<fp16, [1280]> input_5_beta_0_to_fp16 = const()[name = string("input_5_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140434752)))];
+            fp16 input_5_epsilon_0_to_fp16 = const()[name = string("input_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("input_5_cast_fp16")];
+            string var_318_pad_type_0 = const()[name = string("op_318_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_318_strides_0 = const()[name = string("op_318_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_318_pad_0 = const()[name = string("op_318_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_318_dilations_0 = const()[name = string("op_318_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_318_groups_0 = const()[name = string("op_318_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140437376))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143714240))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143714368)))];
+            tensor<fp16, [1, 5120, 1, 1]> var_318_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_318_dilations_0, groups = var_318_groups_0, pad = var_318_pad_0, pad_type = var_318_pad_type_0, strides = var_318_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("op_318_cast_fp16")];
+            string var_324_pad_type_0 = const()[name = string("op_324_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_324_strides_0 = const()[name = string("op_324_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_324_pad_0 = const()[name = string("op_324_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_324_dilations_0 = const()[name = string("op_324_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_324_groups_0 = const()[name = string("op_324_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143826240))), nonzero_data = tensor<fp16, [50752]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143724672))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1]> var_324_cast_fp16 = conv(dilations = var_324_dilations_0, groups = var_324_groups_0, pad = var_324_pad_0, pad_type = var_324_pad_type_0, strides = var_324_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = string("op_324_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1]> input_7_cast_fp16 = add(x = var_318_cast_fp16, y = var_324_cast_fp16)[name = string("input_7_cast_fp16")];
+            string input_9_mode_0 = const()[name = string("input_9_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = string("input_9_cast_fp16")];
+            string var_335_pad_type_0 = const()[name = string("op_335_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_335_strides_0 = const()[name = string("op_335_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_335_pad_0 = const()[name = string("op_335_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_335_dilations_0 = const()[name = string("op_335_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_335_groups_0 = const()[name = string("op_335_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144645504))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147922368))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147922496)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_335_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_335_dilations_0, groups = var_335_groups_0, pad = var_335_pad_0, pad_type = var_335_pad_type_0, strides = var_335_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_335_cast_fp16")];
+            string var_341_pad_type_0 = const()[name = string("op_341_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_341_strides_0 = const()[name = string("op_341_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_341_pad_0 = const()[name = string("op_341_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_341_dilations_0 = const()[name = string("op_341_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_341_groups_0 = const()[name = string("op_341_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148107648))), nonzero_data = tensor<fp16, [91213]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147925120))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_341_cast_fp16 = conv(dilations = var_341_dilations_0, groups = var_341_groups_0, pad = var_341_pad_0, pad_type = var_341_pad_type_0, strides = var_341_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_341_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> hidden_states_3_cast_fp16 = add(x = var_335_cast_fp16, y = var_341_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = string("inputs_7_cast_fp16")];
+            tensor<int32, [4]> obj_35_begin_0 = const()[name = string("obj_35_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> obj_35_end_0 = const()[name = string("obj_35_end_0"), val = tensor<int32, [4]>([2, 1280, 1, 1536])];
+            tensor<bool, [4]> obj_35_end_mask_0 = const()[name = string("obj_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 1280, 1, 1536]> obj_35_cast_fp16 = slice_by_index(begin = obj_35_begin_0, end = obj_35_end_0, end_mask = obj_35_end_mask_0, x = read_state_2)[name = string("obj_35_cast_fp16")];
+            tensor<int32, [4]> obj_37_begin_0 = const()[name = string("obj_37_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> obj_37_end_0 = const()[name = string("obj_37_end_0"), val = tensor<int32, [4]>([2, 1280, 1, 1536])];
+            tensor<bool, [4]> obj_37_end_mask_0 = const()[name = string("obj_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 1280, 1, 1536]> obj_37_cast_fp16 = slice_by_index(begin = obj_37_begin_0, end = obj_37_end_0, end_mask = obj_37_end_mask_0, x = read_state_3)[name = string("obj_37_cast_fp16")];
+            int32 var_363 = const()[name = string("op_363"), val = int32(3)];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_388_to_fp16 = const()[name = string("op_388_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_388_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")];
+            tensor<fp16, [1280]> obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148926912)))];
+            tensor<fp16, [1280]> obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148929536)))];
+            fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("obj_25_cast_fp16")];
+            string var_410_pad_type_0 = const()[name = string("op_410_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_410_strides_0 = const()[name = string("op_410_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_410_pad_0 = const()[name = string("op_410_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_410_dilations_0 = const()[name = string("op_410_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_410_groups_0 = const()[name = string("op_410_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(148932160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149751424))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149751552)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_410_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_410_dilations_0, groups = var_410_groups_0, pad = var_410_pad_0, pad_type = var_410_pad_type_0, strides = var_410_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_410_cast_fp16")];
+            string var_416_pad_type_0 = const()[name = string("op_416_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_416_strides_0 = const()[name = string("op_416_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_416_pad_0 = const()[name = string("op_416_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_416_dilations_0 = const()[name = string("op_416_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_416_groups_0 = const()[name = string("op_416_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149814272))), nonzero_data = tensor<fp16, [29985]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149754176))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_416_cast_fp16 = conv(dilations = var_416_dilations_0, groups = var_416_groups_0, pad = var_416_pad_0, pad_type = var_416_pad_type_0, strides = var_416_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_416_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> query_5_cast_fp16 = add(x = var_410_cast_fp16, y = var_416_cast_fp16)[name = string("query_5_cast_fp16")];
+            string var_425_pad_type_0 = const()[name = string("op_425_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_425_strides_0 = const()[name = string("op_425_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_425_pad_0 = const()[name = string("op_425_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_425_dilations_0 = const()[name = string("op_425_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_425_groups_0 = const()[name = string("op_425_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150019136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150838400))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1]> var_425_cast_fp16 = conv(dilations = var_425_dilations_0, groups = var_425_groups_0, pad = var_425_pad_0, pad_type = var_425_pad_type_0, strides = var_425_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_425_cast_fp16")];
+            string var_431_pad_type_0 = const()[name = string("op_431_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_431_strides_0 = const()[name = string("op_431_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_431_pad_0 = const()[name = string("op_431_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_431_dilations_0 = const()[name = string("op_431_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_431_groups_0 = const()[name = string("op_431_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150885184))), nonzero_data = tensor<fp16, [23287]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150838528))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_431_cast_fp16 = conv(dilations = var_431_dilations_0, groups = var_431_groups_0, pad = var_431_pad_0, pad_type = var_431_pad_type_0, strides = var_431_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_431_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> current_key_3_cast_fp16 = add(x = var_425_cast_fp16, y = var_431_cast_fp16)[name = string("current_key_3_cast_fp16")];
+            string var_441_pad_type_0 = const()[name = string("op_441_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_441_strides_0 = const()[name = string("op_441_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_441_pad_0 = const()[name = string("op_441_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_441_dilations_0 = const()[name = string("op_441_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_441_groups_0 = const()[name = string("op_441_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151090048))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151909312))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151909440)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_441_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_441_dilations_0, groups = var_441_groups_0, pad = var_441_pad_0, pad_type = var_441_pad_type_0, strides = var_441_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_441_cast_fp16")];
+            string var_447_pad_type_0 = const()[name = string("op_447_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_447_strides_0 = const()[name = string("op_447_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_447_pad_0 = const()[name = string("op_447_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_447_dilations_0 = const()[name = string("op_447_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_447_groups_0 = const()[name = string("op_447_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151934720))), nonzero_data = tensor<fp16, [11267]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151912064))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_447_cast_fp16 = conv(dilations = var_447_dilations_0, groups = var_447_groups_0, pad = var_447_pad_0, pad_type = var_447_pad_type_0, strides = var_447_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_447_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> current_value_3_cast_fp16 = add(x = var_441_cast_fp16, y = var_447_cast_fp16)[name = string("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> var_453_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_170_cast_fp16)[name = string("op_453_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> key_3_cast_fp16 = add(x = var_55_cast_fp16_1, y = var_453_cast_fp16)[name = string("key_3_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> var_455_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_170_cast_fp16)[name = string("op_455_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> value_3_cast_fp16 = add(x = var_62_cast_fp16_1, y = var_455_cast_fp16)[name = string("value_3_cast_fp16")];
+            tensor<int32, [4]> var_458 = const()[name = string("op_458"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1]> mh_q_5_cast_fp16 = reshape(shape = var_458, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")];
+            fp16 var_460_to_fp16 = const()[name = string("op_460_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1]> var_461_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_460_to_fp16)[name = string("op_461_cast_fp16")];
+            tensor<int32, [4]> var_462 = const()[name = string("op_462"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 448]> var_463_cast_fp16 = reshape(shape = var_462, x = key_3_cast_fp16)[name = string("op_463_cast_fp16")];
+            bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)];
+            bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1, 448]> mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_461_cast_fp16, y = var_463_cast_fp16)[name = string("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 448]> mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 448]> var_471_cast_fp16 = softmax(axis = var_363, x = mh_w_11_cast_fp16)[name = string("op_471_cast_fp16")];
+            tensor<int32, [4]> var_472 = const()[name = string("op_472"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 448]> var_473_cast_fp16 = reshape(shape = var_472, x = value_3_cast_fp16)[name = string("op_473_cast_fp16")];
+            bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)];
+            bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_473_cast_fp16, y = var_471_cast_fp16)[name = string("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_476 = const()[name = string("op_476"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1]> input_11_cast_fp16 = reshape(shape = var_476, x = attn_5_cast_fp16)[name = string("input_11_cast_fp16")];
+            string var_486_pad_type_0 = const()[name = string("op_486_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_486_strides_0 = const()[name = string("op_486_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_486_pad_0 = const()[name = string("op_486_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_486_dilations_0 = const()[name = string("op_486_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_486_groups_0 = const()[name = string("op_486_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152139584))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152958848))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152958976)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_486_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_486_dilations_0, groups = var_486_groups_0, pad = var_486_pad_0, pad_type = var_486_pad_type_0, strides = var_486_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_486_cast_fp16")];
+            string var_492_pad_type_0 = const()[name = string("op_492_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_492_strides_0 = const()[name = string("op_492_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_492_pad_0 = const()[name = string("op_492_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_492_dilations_0 = const()[name = string("op_492_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_492_groups_0 = const()[name = string("op_492_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152986048))), nonzero_data = tensor<fp16, [12187]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152961600))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_492_cast_fp16 = conv(dilations = var_492_dilations_0, groups = var_492_groups_0, pad = var_492_pad_0, pad_type = var_492_pad_type_0, strides = var_492_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_492_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> obj_31_cast_fp16 = add(x = var_486_cast_fp16, y = var_492_cast_fp16)[name = string("obj_31_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_9_cast_fp16")];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_507_to_fp16 = const()[name = string("op_507_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_507_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")];
+            tensor<fp16, [1280]> obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153190912)))];
+            tensor<fp16, [1280]> obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153193536)))];
+            fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_33_cast_fp16")];
+            string var_527_pad_type_0 = const()[name = string("op_527_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_527_strides_0 = const()[name = string("op_527_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_527_pad_0 = const()[name = string("op_527_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_527_dilations_0 = const()[name = string("op_527_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_527_groups_0 = const()[name = string("op_527_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153196160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154015424))))[name = string("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154015552)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_527_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_527_dilations_0, groups = var_527_groups_0, pad = var_527_pad_0, pad_type = var_527_pad_type_0, strides = var_527_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_527_cast_fp16")];
+            string var_533_pad_type_0 = const()[name = string("op_533_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_533_strides_0 = const()[name = string("op_533_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_533_pad_0 = const()[name = string("op_533_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_533_dilations_0 = const()[name = string("op_533_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_533_groups_0 = const()[name = string("op_533_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154061248))), nonzero_data = tensor<fp16, [21483]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154018176))))[name = string("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_533_cast_fp16 = conv(dilations = var_533_dilations_0, groups = var_533_groups_0, pad = var_533_pad_0, pad_type = var_533_pad_type_0, strides = var_533_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_533_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> query_7_cast_fp16 = add(x = var_527_cast_fp16, y = var_533_cast_fp16)[name = string("query_7_cast_fp16")];
+            tensor<int32, [4]> var_536 = const()[name = string("op_536"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1]> mh_q_7_cast_fp16 = reshape(shape = var_536, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")];
+            fp16 var_538_to_fp16 = const()[name = string("op_538_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1]> var_539_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_538_to_fp16)[name = string("op_539_cast_fp16")];
+            tensor<int32, [4]> var_540 = const()[name = string("op_540"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1536]> var_541_cast_fp16 = reshape(shape = var_540, x = obj_35_cast_fp16)[name = string("op_541_cast_fp16")];
+            bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)];
+            bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1, 1536]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_539_cast_fp16, y = var_541_cast_fp16)[name = string("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1536]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1536]> obj_41_cast_fp16 = softmax(axis = var_363, x = mh_w_15_cast_fp16)[name = string("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_550 = const()[name = string("op_550"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1536]> var_551_cast_fp16 = reshape(shape = var_550, x = obj_37_cast_fp16)[name = string("op_551_cast_fp16")];
+            bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)];
+            bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_551_cast_fp16, y = obj_41_cast_fp16)[name = string("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_554 = const()[name = string("op_554"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1]> input_13_cast_fp16 = reshape(shape = var_554, x = attn_7_cast_fp16)[name = string("input_13_cast_fp16")];
+            string var_564_pad_type_0 = const()[name = string("op_564_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_564_strides_0 = const()[name = string("op_564_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_564_pad_0 = const()[name = string("op_564_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_564_dilations_0 = const()[name = string("op_564_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_564_groups_0 = const()[name = string("op_564_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(154266112))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155085376))))[name = string("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155085504)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_564_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_564_dilations_0, groups = var_564_groups_0, pad = var_564_pad_0, pad_type = var_564_pad_type_0, strides = var_564_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("op_564_cast_fp16")];
+            string var_570_pad_type_0 = const()[name = string("op_570_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_570_strides_0 = const()[name = string("op_570_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_570_pad_0 = const()[name = string("op_570_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_570_dilations_0 = const()[name = string("op_570_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_570_groups_0 = const()[name = string("op_570_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155098496))), nonzero_data = tensor<fp16, [5143]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155088128))))[name = string("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_570_cast_fp16 = conv(dilations = var_570_dilations_0, groups = var_570_groups_0, pad = var_570_pad_0, pad_type = var_570_pad_type_0, strides = var_570_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = string("op_570_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> obj_39_cast_fp16 = add(x = var_564_cast_fp16, y = var_570_cast_fp16)[name = string("obj_39_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_581_to_fp16 = const()[name = string("op_581_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_581_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")];
+            tensor<fp16, [1280]> input_15_gamma_0_to_fp16 = const()[name = string("input_15_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155303360)))];
+            tensor<fp16, [1280]> input_15_beta_0_to_fp16 = const()[name = string("input_15_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155305984)))];
+            fp16 input_15_epsilon_0_to_fp16 = const()[name = string("input_15_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_15_cast_fp16")];
+            string var_599_pad_type_0 = const()[name = string("op_599_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_599_strides_0 = const()[name = string("op_599_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_599_pad_0 = const()[name = string("op_599_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_599_dilations_0 = const()[name = string("op_599_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_599_groups_0 = const()[name = string("op_599_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155308608))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158585472))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158585600)))];
+            tensor<fp16, [1, 5120, 1, 1]> var_599_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_599_dilations_0, groups = var_599_groups_0, pad = var_599_pad_0, pad_type = var_599_pad_type_0, strides = var_599_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_599_cast_fp16")];
+            string var_605_pad_type_0 = const()[name = string("op_605_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_605_strides_0 = const()[name = string("op_605_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_605_pad_0 = const()[name = string("op_605_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_605_dilations_0 = const()[name = string("op_605_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_605_groups_0 = const()[name = string("op_605_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158681152))), nonzero_data = tensor<fp16, [42562]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158595904))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1]> var_605_cast_fp16 = conv(dilations = var_605_dilations_0, groups = var_605_groups_0, pad = var_605_pad_0, pad_type = var_605_pad_type_0, strides = var_605_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_605_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1]> input_17_cast_fp16 = add(x = var_599_cast_fp16, y = var_605_cast_fp16)[name = string("input_17_cast_fp16")];
+            string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")];
+            string var_616_pad_type_0 = const()[name = string("op_616_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_616_strides_0 = const()[name = string("op_616_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_616_pad_0 = const()[name = string("op_616_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_616_dilations_0 = const()[name = string("op_616_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_616_groups_0 = const()[name = string("op_616_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(159500416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162777280))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162777408)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_616_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_616_dilations_0, groups = var_616_groups_0, pad = var_616_pad_0, pad_type = var_616_pad_type_0, strides = var_616_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_616_cast_fp16")];
+            string var_622_pad_type_0 = const()[name = string("op_622_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_622_strides_0 = const()[name = string("op_622_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_622_pad_0 = const()[name = string("op_622_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_622_dilations_0 = const()[name = string("op_622_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_622_groups_0 = const()[name = string("op_622_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162868032))), nonzero_data = tensor<fp16, [43939]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162780032))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_622_cast_fp16 = conv(dilations = var_622_dilations_0, groups = var_622_groups_0, pad = var_622_pad_0, pad_type = var_622_pad_type_0, strides = var_622_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_622_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> hidden_states_5_cast_fp16 = add(x = var_616_cast_fp16, y = var_622_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_13_cast_fp16")];
+            tensor<int32, [4]> obj_53_begin_0 = const()[name = string("obj_53_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> obj_53_end_0 = const()[name = string("obj_53_end_0"), val = tensor<int32, [4]>([3, 1280, 1, 1536])];
+            tensor<bool, [4]> obj_53_end_mask_0 = const()[name = string("obj_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 1280, 1, 1536]> obj_53_cast_fp16 = slice_by_index(begin = obj_53_begin_0, end = obj_53_end_0, end_mask = obj_53_end_mask_0, x = read_state_2)[name = string("obj_53_cast_fp16")];
+            tensor<int32, [4]> obj_55_begin_0 = const()[name = string("obj_55_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> obj_55_end_0 = const()[name = string("obj_55_end_0"), val = tensor<int32, [4]>([3, 1280, 1, 1536])];
+            tensor<bool, [4]> obj_55_end_mask_0 = const()[name = string("obj_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 1280, 1, 1536]> obj_55_cast_fp16 = slice_by_index(begin = obj_55_begin_0, end = obj_55_end_0, end_mask = obj_55_end_mask_0, x = read_state_3)[name = string("obj_55_cast_fp16")];
+            int32 var_644 = const()[name = string("op_644"), val = int32(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_669_to_fp16 = const()[name = string("op_669_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_669_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")];
+            tensor<fp16, [1280]> obj_43_gamma_0_to_fp16 = const()[name = string("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163687296)))];
+            tensor<fp16, [1280]> obj_43_beta_0_to_fp16 = const()[name = string("obj_43_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163689920)))];
+            fp16 obj_43_epsilon_0_to_fp16 = const()[name = string("obj_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_43_cast_fp16")];
+            string var_691_pad_type_0 = const()[name = string("op_691_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_691_strides_0 = const()[name = string("op_691_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_691_pad_0 = const()[name = string("op_691_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_691_dilations_0 = const()[name = string("op_691_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_691_groups_0 = const()[name = string("op_691_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163692544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164511808))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164511936)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_691_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_691_dilations_0, groups = var_691_groups_0, pad = var_691_pad_0, pad_type = var_691_pad_type_0, strides = var_691_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_691_cast_fp16")];
+            string var_697_pad_type_0 = const()[name = string("op_697_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_697_strides_0 = const()[name = string("op_697_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_697_pad_0 = const()[name = string("op_697_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_697_dilations_0 = const()[name = string("op_697_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_697_groups_0 = const()[name = string("op_697_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164546816))), nonzero_data = tensor<fp16, [16094]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164514560))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_697_cast_fp16 = conv(dilations = var_697_dilations_0, groups = var_697_groups_0, pad = var_697_pad_0, pad_type = var_697_pad_type_0, strides = var_697_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_697_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> query_9_cast_fp16 = add(x = var_691_cast_fp16, y = var_697_cast_fp16)[name = string("query_9_cast_fp16")];
+            string var_706_pad_type_0 = const()[name = string("op_706_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_706_strides_0 = const()[name = string("op_706_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_706_pad_0 = const()[name = string("op_706_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_706_dilations_0 = const()[name = string("op_706_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_706_groups_0 = const()[name = string("op_706_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164751680))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165570944))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1]> var_706_cast_fp16 = conv(dilations = var_706_dilations_0, groups = var_706_groups_0, pad = var_706_pad_0, pad_type = var_706_pad_type_0, strides = var_706_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_706_cast_fp16")];
+            string var_712_pad_type_0 = const()[name = string("op_712_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_712_strides_0 = const()[name = string("op_712_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_712_pad_0 = const()[name = string("op_712_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_712_dilations_0 = const()[name = string("op_712_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_712_groups_0 = const()[name = string("op_712_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165608576))), nonzero_data = tensor<fp16, [18690]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165571072))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_712_cast_fp16 = conv(dilations = var_712_dilations_0, groups = var_712_groups_0, pad = var_712_pad_0, pad_type = var_712_pad_type_0, strides = var_712_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_712_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> current_key_5_cast_fp16 = add(x = var_706_cast_fp16, y = var_712_cast_fp16)[name = string("current_key_5_cast_fp16")];
+            string var_722_pad_type_0 = const()[name = string("op_722_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_722_strides_0 = const()[name = string("op_722_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_722_pad_0 = const()[name = string("op_722_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_722_dilations_0 = const()[name = string("op_722_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_722_groups_0 = const()[name = string("op_722_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(165813440))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166632704))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166632832)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_722_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_722_dilations_0, groups = var_722_groups_0, pad = var_722_pad_0, pad_type = var_722_pad_type_0, strides = var_722_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_722_cast_fp16")];
+            string var_728_pad_type_0 = const()[name = string("op_728_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_728_strides_0 = const()[name = string("op_728_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_728_pad_0 = const()[name = string("op_728_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_728_dilations_0 = const()[name = string("op_728_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_728_groups_0 = const()[name = string("op_728_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166648384))), nonzero_data = tensor<fp16, [6431]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166635456))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_728_cast_fp16 = conv(dilations = var_728_dilations_0, groups = var_728_groups_0, pad = var_728_pad_0, pad_type = var_728_pad_type_0, strides = var_728_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_728_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> current_value_5_cast_fp16 = add(x = var_722_cast_fp16, y = var_728_cast_fp16)[name = string("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> var_734_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_170_cast_fp16)[name = string("op_734_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> key_5_cast_fp16 = add(x = var_55_cast_fp16_2, y = var_734_cast_fp16)[name = string("key_5_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> var_736_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_170_cast_fp16)[name = string("op_736_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> value_5_cast_fp16 = add(x = var_62_cast_fp16_2, y = var_736_cast_fp16)[name = string("value_5_cast_fp16")];
+            tensor<int32, [4]> var_739 = const()[name = string("op_739"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1]> mh_q_9_cast_fp16 = reshape(shape = var_739, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")];
+            fp16 var_741_to_fp16 = const()[name = string("op_741_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1]> var_742_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_741_to_fp16)[name = string("op_742_cast_fp16")];
+            tensor<int32, [4]> var_743 = const()[name = string("op_743"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 448]> var_744_cast_fp16 = reshape(shape = var_743, x = key_5_cast_fp16)[name = string("op_744_cast_fp16")];
+            bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)];
+            bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1, 448]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_742_cast_fp16, y = var_744_cast_fp16)[name = string("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 448]> mh_w_19_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 448]> var_752_cast_fp16 = softmax(axis = var_644, x = mh_w_19_cast_fp16)[name = string("op_752_cast_fp16")];
+            tensor<int32, [4]> var_753 = const()[name = string("op_753"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 448]> var_754_cast_fp16 = reshape(shape = var_753, x = value_5_cast_fp16)[name = string("op_754_cast_fp16")];
+            bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)];
+            bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_754_cast_fp16, y = var_752_cast_fp16)[name = string("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_757 = const()[name = string("op_757"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1]> input_21_cast_fp16 = reshape(shape = var_757, x = attn_9_cast_fp16)[name = string("input_21_cast_fp16")];
+            string var_767_pad_type_0 = const()[name = string("op_767_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_767_strides_0 = const()[name = string("op_767_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_767_pad_0 = const()[name = string("op_767_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_767_dilations_0 = const()[name = string("op_767_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_767_groups_0 = const()[name = string("op_767_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166853248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167672512))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167672640)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_767_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_767_dilations_0, groups = var_767_groups_0, pad = var_767_pad_0, pad_type = var_767_pad_type_0, strides = var_767_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("op_767_cast_fp16")];
+            string var_773_pad_type_0 = const()[name = string("op_773_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_773_strides_0 = const()[name = string("op_773_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_773_pad_0 = const()[name = string("op_773_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_773_dilations_0 = const()[name = string("op_773_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_773_groups_0 = const()[name = string("op_773_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167686720))), nonzero_data = tensor<fp16, [5678]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167675264))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_773_cast_fp16 = conv(dilations = var_773_dilations_0, groups = var_773_groups_0, pad = var_773_pad_0, pad_type = var_773_pad_type_0, strides = var_773_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = string("op_773_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> obj_49_cast_fp16 = add(x = var_767_cast_fp16, y = var_773_cast_fp16)[name = string("obj_49_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_49_cast_fp16)[name = string("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_788_to_fp16 = const()[name = string("op_788_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_788_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")];
+            tensor<fp16, [1280]> obj_51_gamma_0_to_fp16 = const()[name = string("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167891584)))];
+            tensor<fp16, [1280]> obj_51_beta_0_to_fp16 = const()[name = string("obj_51_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167894208)))];
+            fp16 obj_51_epsilon_0_to_fp16 = const()[name = string("obj_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("obj_51_cast_fp16")];
+            string var_808_pad_type_0 = const()[name = string("op_808_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_808_strides_0 = const()[name = string("op_808_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_808_pad_0 = const()[name = string("op_808_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_808_dilations_0 = const()[name = string("op_808_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_808_groups_0 = const()[name = string("op_808_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(167896832))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168716096))))[name = string("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168716224)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_808_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_808_dilations_0, groups = var_808_groups_0, pad = var_808_pad_0, pad_type = var_808_pad_type_0, strides = var_808_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = string("op_808_cast_fp16")];
+            string var_814_pad_type_0 = const()[name = string("op_814_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_814_strides_0 = const()[name = string("op_814_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_814_pad_0 = const()[name = string("op_814_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_814_dilations_0 = const()[name = string("op_814_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_814_groups_0 = const()[name = string("op_814_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168746560))), nonzero_data = tensor<fp16, [13824]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168718848))))[name = string("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_814_cast_fp16 = conv(dilations = var_814_dilations_0, groups = var_814_groups_0, pad = var_814_pad_0, pad_type = var_814_pad_type_0, strides = var_814_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = string("op_814_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> query_11_cast_fp16 = add(x = var_808_cast_fp16, y = var_814_cast_fp16)[name = string("query_11_cast_fp16")];
+            tensor<int32, [4]> var_817 = const()[name = string("op_817"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1]> mh_q_11_cast_fp16 = reshape(shape = var_817, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")];
+            fp16 var_819_to_fp16 = const()[name = string("op_819_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1]> var_820_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_819_to_fp16)[name = string("op_820_cast_fp16")];
+            tensor<int32, [4]> var_821 = const()[name = string("op_821"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1536]> var_822_cast_fp16 = reshape(shape = var_821, x = obj_53_cast_fp16)[name = string("op_822_cast_fp16")];
+            bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)];
+            bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1, 1536]> mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_820_cast_fp16, y = var_822_cast_fp16)[name = string("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1536]> mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1536]> obj_59_cast_fp16 = softmax(axis = var_644, x = mh_w_23_cast_fp16)[name = string("obj_59_cast_fp16")];
+            tensor<int32, [4]> var_831 = const()[name = string("op_831"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1536]> var_832_cast_fp16 = reshape(shape = var_831, x = obj_55_cast_fp16)[name = string("op_832_cast_fp16")];
+            bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)];
+            bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_832_cast_fp16, y = obj_59_cast_fp16)[name = string("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_835 = const()[name = string("op_835"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1]> input_23_cast_fp16 = reshape(shape = var_835, x = attn_11_cast_fp16)[name = string("input_23_cast_fp16")];
+            string var_845_pad_type_0 = const()[name = string("op_845_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_845_strides_0 = const()[name = string("op_845_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_845_pad_0 = const()[name = string("op_845_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_845_dilations_0 = const()[name = string("op_845_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_845_groups_0 = const()[name = string("op_845_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168951424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169770688))))[name = string("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169770816)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_845_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_845_dilations_0, groups = var_845_groups_0, pad = var_845_pad_0, pad_type = var_845_pad_type_0, strides = var_845_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_845_cast_fp16")];
+            string var_851_pad_type_0 = const()[name = string("op_851_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_851_strides_0 = const()[name = string("op_851_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_851_pad_0 = const()[name = string("op_851_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_851_dilations_0 = const()[name = string("op_851_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_851_groups_0 = const()[name = string("op_851_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169786432))), nonzero_data = tensor<fp16, [6438]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169773440))))[name = string("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_851_cast_fp16 = conv(dilations = var_851_dilations_0, groups = var_851_groups_0, pad = var_851_pad_0, pad_type = var_851_pad_type_0, strides = var_851_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_851_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> obj_57_cast_fp16 = add(x = var_845_cast_fp16, y = var_851_cast_fp16)[name = string("obj_57_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_57_cast_fp16)[name = string("inputs_17_cast_fp16")];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_865_to_fp16 = const()[name = string("op_865_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_865_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")];
+            tensor<fp16, [1280]> input_25_gamma_0_to_fp16 = const()[name = string("input_25_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169991296)))];
+            tensor<fp16, [1280]> input_25_beta_0_to_fp16 = const()[name = string("input_25_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169993920)))];
+            fp16 input_25_epsilon_0_to_fp16 = const()[name = string("input_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("input_25_cast_fp16")];
+            string var_883_pad_type_0 = const()[name = string("op_883_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_883_strides_0 = const()[name = string("op_883_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_883_pad_0 = const()[name = string("op_883_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_883_dilations_0 = const()[name = string("op_883_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_883_groups_0 = const()[name = string("op_883_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169996544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173273408))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173273536)))];
+            tensor<fp16, [1, 5120, 1, 1]> var_883_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_883_dilations_0, groups = var_883_groups_0, pad = var_883_pad_0, pad_type = var_883_pad_type_0, strides = var_883_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_883_cast_fp16")];
+            string var_889_pad_type_0 = const()[name = string("op_889_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_889_strides_0 = const()[name = string("op_889_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_889_pad_0 = const()[name = string("op_889_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_889_dilations_0 = const()[name = string("op_889_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_889_groups_0 = const()[name = string("op_889_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173445760))), nonzero_data = tensor<fp16, [80920]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(173283840))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1]> var_889_cast_fp16 = conv(dilations = var_889_dilations_0, groups = var_889_groups_0, pad = var_889_pad_0, pad_type = var_889_pad_type_0, strides = var_889_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_889_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1]> input_27_cast_fp16 = add(x = var_883_cast_fp16, y = var_889_cast_fp16)[name = string("input_27_cast_fp16")];
+            string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")];
+            string var_900_pad_type_0 = const()[name = string("op_900_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_900_strides_0 = const()[name = string("op_900_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_900_pad_0 = const()[name = string("op_900_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_900_dilations_0 = const()[name = string("op_900_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_900_groups_0 = const()[name = string("op_900_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174265024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177541888))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177542016)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_900_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_900_dilations_0, groups = var_900_groups_0, pad = var_900_pad_0, pad_type = var_900_pad_type_0, strides = var_900_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("op_900_cast_fp16")];
+            string var_906_pad_type_0 = const()[name = string("op_906_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_906_strides_0 = const()[name = string("op_906_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_906_pad_0 = const()[name = string("op_906_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_906_dilations_0 = const()[name = string("op_906_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_906_groups_0 = const()[name = string("op_906_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177624832))), nonzero_data = tensor<fp16, [40054]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177544640))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_906_cast_fp16 = conv(dilations = var_906_dilations_0, groups = var_906_groups_0, pad = var_906_pad_0, pad_type = var_906_pad_type_0, strides = var_906_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = string("op_906_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> hidden_states_7_cast_fp16 = add(x = var_900_cast_fp16, y = var_906_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_19_cast_fp16")];
+            tensor<int32, [4]> obj_71_begin_0 = const()[name = string("obj_71_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> obj_71_end_0 = const()[name = string("obj_71_end_0"), val = tensor<int32, [4]>([4, 1280, 1, 1536])];
+            tensor<bool, [4]> obj_71_end_mask_0 = const()[name = string("obj_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 1280, 1, 1536]> obj_71_cast_fp16 = slice_by_index(begin = obj_71_begin_0, end = obj_71_end_0, end_mask = obj_71_end_mask_0, x = read_state_2)[name = string("obj_71_cast_fp16")];
+            tensor<int32, [4]> obj_73_begin_0 = const()[name = string("obj_73_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> obj_73_end_0 = const()[name = string("obj_73_end_0"), val = tensor<int32, [4]>([4, 1280, 1, 1536])];
+            tensor<bool, [4]> obj_73_end_mask_0 = const()[name = string("obj_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 1280, 1, 1536]> obj_73_cast_fp16 = slice_by_index(begin = obj_73_begin_0, end = obj_73_end_0, end_mask = obj_73_end_mask_0, x = read_state_3)[name = string("obj_73_cast_fp16")];
+            int32 var_929 = const()[name = string("op_929"), val = int32(3)];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_954_to_fp16 = const()[name = string("op_954_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_954_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")];
+            tensor<fp16, [1280]> obj_61_gamma_0_to_fp16 = const()[name = string("obj_61_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178444096)))];
+            tensor<fp16, [1280]> obj_61_beta_0_to_fp16 = const()[name = string("obj_61_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178446720)))];
+            fp16 obj_61_epsilon_0_to_fp16 = const()[name = string("obj_61_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("obj_61_cast_fp16")];
+            string var_976_pad_type_0 = const()[name = string("op_976_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_976_strides_0 = const()[name = string("op_976_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_976_pad_0 = const()[name = string("op_976_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_976_dilations_0 = const()[name = string("op_976_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_976_groups_0 = const()[name = string("op_976_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178449344))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179268608))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179268736)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_976_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_976_dilations_0, groups = var_976_groups_0, pad = var_976_pad_0, pad_type = var_976_pad_type_0, strides = var_976_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_976_cast_fp16")];
+            string var_982_pad_type_0 = const()[name = string("op_982_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_982_strides_0 = const()[name = string("op_982_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_982_pad_0 = const()[name = string("op_982_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_982_dilations_0 = const()[name = string("op_982_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_982_groups_0 = const()[name = string("op_982_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179292800))), nonzero_data = tensor<fp16, [10664]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179271360))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_982_cast_fp16 = conv(dilations = var_982_dilations_0, groups = var_982_groups_0, pad = var_982_pad_0, pad_type = var_982_pad_type_0, strides = var_982_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_982_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> query_13_cast_fp16 = add(x = var_976_cast_fp16, y = var_982_cast_fp16)[name = string("query_13_cast_fp16")];
+            string var_991_pad_type_0 = const()[name = string("op_991_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_991_strides_0 = const()[name = string("op_991_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_991_pad_0 = const()[name = string("op_991_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_991_dilations_0 = const()[name = string("op_991_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_991_groups_0 = const()[name = string("op_991_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(179497664))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180316928))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 1280, 1, 1]> var_991_cast_fp16 = conv(dilations = var_991_dilations_0, groups = var_991_groups_0, pad = var_991_pad_0, pad_type = var_991_pad_type_0, strides = var_991_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_991_cast_fp16")];
+            string var_997_pad_type_0 = const()[name = string("op_997_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_997_strides_0 = const()[name = string("op_997_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_997_pad_0 = const()[name = string("op_997_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_997_dilations_0 = const()[name = string("op_997_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_997_groups_0 = const()[name = string("op_997_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180337920))), nonzero_data = tensor<fp16, [10387]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180317056))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_997_cast_fp16 = conv(dilations = var_997_dilations_0, groups = var_997_groups_0, pad = var_997_pad_0, pad_type = var_997_pad_type_0, strides = var_997_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_997_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> current_key_cast_fp16 = add(x = var_991_cast_fp16, y = var_997_cast_fp16)[name = string("current_key_cast_fp16")];
+            string var_1007_pad_type_0 = const()[name = string("op_1007_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1007_strides_0 = const()[name = string("op_1007_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1007_pad_0 = const()[name = string("op_1007_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1007_dilations_0 = const()[name = string("op_1007_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1007_groups_0 = const()[name = string("op_1007_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180542784))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181362048))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181362176)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_1007_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1007_dilations_0, groups = var_1007_groups_0, pad = var_1007_pad_0, pad_type = var_1007_pad_type_0, strides = var_1007_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1007_cast_fp16")];
+            string var_1013_pad_type_0 = const()[name = string("op_1013_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1013_strides_0 = const()[name = string("op_1013_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1013_pad_0 = const()[name = string("op_1013_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1013_dilations_0 = const()[name = string("op_1013_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1013_groups_0 = const()[name = string("op_1013_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181379584))), nonzero_data = tensor<fp16, [7342]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181364800))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_1013_cast_fp16 = conv(dilations = var_1013_dilations_0, groups = var_1013_groups_0, pad = var_1013_pad_0, pad_type = var_1013_pad_type_0, strides = var_1013_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1013_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> current_value_cast_fp16 = add(x = var_1007_cast_fp16, y = var_1013_cast_fp16)[name = string("current_value_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> var_1019_cast_fp16 = mul(x = current_key_cast_fp16, y = var_170_cast_fp16)[name = string("op_1019_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> key_cast_fp16 = add(x = var_55_cast_fp16_3, y = var_1019_cast_fp16)[name = string("key_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> var_1021_cast_fp16 = mul(x = current_value_cast_fp16, y = var_170_cast_fp16)[name = string("op_1021_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 448]> value_cast_fp16 = add(x = var_62_cast_fp16_3, y = var_1021_cast_fp16)[name = string("value_cast_fp16")];
+            tensor<int32, [4]> var_1024 = const()[name = string("op_1024"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1]> mh_q_13_cast_fp16 = reshape(shape = var_1024, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")];
+            fp16 var_1026_to_fp16 = const()[name = string("op_1026_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1]> var_1027_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1026_to_fp16)[name = string("op_1027_cast_fp16")];
+            tensor<int32, [4]> var_1028 = const()[name = string("op_1028"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 448]> var_1029_cast_fp16 = reshape(shape = var_1028, x = key_cast_fp16)[name = string("op_1029_cast_fp16")];
+            bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)];
+            bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1, 448]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1027_cast_fp16, y = var_1029_cast_fp16)[name = string("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 448]> mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_187_cast_fp16)[name = string("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 448]> var_1037_cast_fp16 = softmax(axis = var_929, x = mh_w_27_cast_fp16)[name = string("op_1037_cast_fp16")];
+            tensor<int32, [4]> var_1038 = const()[name = string("op_1038"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 448]> var_1039_cast_fp16 = reshape(shape = var_1038, x = value_cast_fp16)[name = string("op_1039_cast_fp16")];
+            bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)];
+            bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1039_cast_fp16, y = var_1037_cast_fp16)[name = string("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_1042 = const()[name = string("op_1042"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1]> input_31_cast_fp16 = reshape(shape = var_1042, x = attn_13_cast_fp16)[name = string("input_31_cast_fp16")];
+            string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181584448))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182403712))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182403840)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_1052_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_1052_cast_fp16")];
+            string var_1058_pad_type_0 = const()[name = string("op_1058_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1058_strides_0 = const()[name = string("op_1058_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1058_pad_0 = const()[name = string("op_1058_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1058_dilations_0 = const()[name = string("op_1058_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1058_groups_0 = const()[name = string("op_1058_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182420992))), nonzero_data = tensor<fp16, [7219]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182406464))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_1058_cast_fp16 = conv(dilations = var_1058_dilations_0, groups = var_1058_groups_0, pad = var_1058_pad_0, pad_type = var_1058_pad_type_0, strides = var_1058_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_1058_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> obj_67_cast_fp16 = add(x = var_1052_cast_fp16, y = var_1058_cast_fp16)[name = string("obj_67_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_21_cast_fp16")];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1073_to_fp16 = const()[name = string("op_1073_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1073_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")];
+            tensor<fp16, [1280]> obj_69_gamma_0_to_fp16 = const()[name = string("obj_69_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182625856)))];
+            tensor<fp16, [1280]> obj_69_beta_0_to_fp16 = const()[name = string("obj_69_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182628480)))];
+            fp16 obj_69_epsilon_0_to_fp16 = const()[name = string("obj_69_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_69_cast_fp16")];
+            string var_1093_pad_type_0 = const()[name = string("op_1093_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1093_strides_0 = const()[name = string("op_1093_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1093_pad_0 = const()[name = string("op_1093_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1093_dilations_0 = const()[name = string("op_1093_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1093_groups_0 = const()[name = string("op_1093_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182631104))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183450368))))[name = string("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183450496)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_1093_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1093_dilations_0, groups = var_1093_groups_0, pad = var_1093_pad_0, pad_type = var_1093_pad_type_0, strides = var_1093_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_1093_cast_fp16")];
+            string var_1099_pad_type_0 = const()[name = string("op_1099_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1099_strides_0 = const()[name = string("op_1099_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1099_pad_0 = const()[name = string("op_1099_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1099_dilations_0 = const()[name = string("op_1099_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1099_groups_0 = const()[name = string("op_1099_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183468544))), nonzero_data = tensor<fp16, [7675]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183453120))))[name = string("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_1099_cast_fp16 = conv(dilations = var_1099_dilations_0, groups = var_1099_groups_0, pad = var_1099_pad_0, pad_type = var_1099_pad_type_0, strides = var_1099_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_1099_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> query_cast_fp16 = add(x = var_1093_cast_fp16, y = var_1099_cast_fp16)[name = string("query_cast_fp16")];
+            tensor<int32, [4]> var_1102 = const()[name = string("op_1102"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1]> mh_q_cast_fp16 = reshape(shape = var_1102, x = query_cast_fp16)[name = string("mh_q_cast_fp16")];
+            fp16 var_1104_to_fp16 = const()[name = string("op_1104_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 20, 64, 1]> var_1105_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_1104_to_fp16)[name = string("op_1105_cast_fp16")];
+            tensor<int32, [4]> var_1106 = const()[name = string("op_1106"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1536]> var_1107_cast_fp16 = reshape(shape = var_1106, x = obj_71_cast_fp16)[name = string("op_1107_cast_fp16")];
+            bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)];
+            bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 20, 1, 1536]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1105_cast_fp16, y = var_1107_cast_fp16)[name = string("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1536]> mh_w_cast_fp16 = add(x = mh_w_29_cast_fp16, y = var_265_cast_fp16)[name = string("mh_w_cast_fp16")];
+            tensor<fp16, [1, 20, 1, 1536]> obj_77_cast_fp16 = softmax(axis = var_929, x = mh_w_cast_fp16)[name = string("obj_77_cast_fp16")];
+            tensor<int32, [4]> var_1116 = const()[name = string("op_1116"), val = tensor<int32, [4]>([1, 20, 64, -1])];
+            tensor<fp16, [1, 20, 64, 1536]> var_1117_cast_fp16 = reshape(shape = var_1116, x = obj_73_cast_fp16)[name = string("op_1117_cast_fp16")];
+            bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)];
+            bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 20, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_1117_cast_fp16, y = obj_77_cast_fp16)[name = string("attn_cast_fp16")];
+            tensor<int32, [4]> var_1120 = const()[name = string("op_1120"), val = tensor<int32, [4]>([1, 1280, 1, -1])];
+            tensor<fp16, [1, 1280, 1, 1]> input_33_cast_fp16 = reshape(shape = var_1120, x = attn_cast_fp16)[name = string("input_33_cast_fp16")];
+            string var_1130_pad_type_0 = const()[name = string("op_1130_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1130_strides_0 = const()[name = string("op_1130_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1130_pad_0 = const()[name = string("op_1130_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1130_dilations_0 = const()[name = string("op_1130_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1130_groups_0 = const()[name = string("op_1130_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183673408))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184492672))))[name = string("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184492800)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_1130_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1130_dilations_0, groups = var_1130_groups_0, pad = var_1130_pad_0, pad_type = var_1130_pad_type_0, strides = var_1130_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1130_cast_fp16")];
+            string var_1136_pad_type_0 = const()[name = string("op_1136_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1136_strides_0 = const()[name = string("op_1136_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1136_pad_0 = const()[name = string("op_1136_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1136_dilations_0 = const()[name = string("op_1136_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1136_groups_0 = const()[name = string("op_1136_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 1280, 1, 1]> layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184507136))), nonzero_data = tensor<fp16, [5809]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184495424))))[name = string("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_1136_cast_fp16 = conv(dilations = var_1136_dilations_0, groups = var_1136_groups_0, pad = var_1136_pad_0, pad_type = var_1136_pad_type_0, strides = var_1136_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1136_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> obj_75_cast_fp16 = add(x = var_1130_cast_fp16, y = var_1136_cast_fp16)[name = string("obj_75_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1150_to_fp16 = const()[name = string("op_1150_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1150_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")];
+            tensor<fp16, [1280]> input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184712000)))];
+            tensor<fp16, [1280]> input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184714624)))];
+            fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_35_cast_fp16")];
+            string var_1168_pad_type_0 = const()[name = string("op_1168_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1168_strides_0 = const()[name = string("op_1168_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1168_pad_0 = const()[name = string("op_1168_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1168_dilations_0 = const()[name = string("op_1168_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1168_groups_0 = const()[name = string("op_1168_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184717248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187994112))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [5120]> layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [5120]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187994240)))];
+            tensor<fp16, [1, 5120, 1, 1]> var_1168_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1168_dilations_0, groups = var_1168_groups_0, pad = var_1168_pad_0, pad_type = var_1168_pad_type_0, strides = var_1168_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1168_cast_fp16")];
+            string var_1174_pad_type_0 = const()[name = string("op_1174_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1174_strides_0 = const()[name = string("op_1174_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1174_pad_0 = const()[name = string("op_1174_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1174_dilations_0 = const()[name = string("op_1174_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1174_groups_0 = const()[name = string("op_1174_groups_0"), val = int32(1)];
+            tensor<fp16, [5120, 1280, 1, 1]> layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [5120, 1280, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188057280))), nonzero_data = tensor<fp16, [26331]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188004544))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 5120, 1, 1]> var_1174_cast_fp16 = conv(dilations = var_1174_dilations_0, groups = var_1174_groups_0, pad = var_1174_pad_0, pad_type = var_1174_pad_type_0, strides = var_1174_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1174_cast_fp16")];
+            tensor<fp16, [1, 5120, 1, 1]> input_37_cast_fp16 = add(x = var_1168_cast_fp16, y = var_1174_cast_fp16)[name = string("input_37_cast_fp16")];
+            string input_mode_0 = const()[name = string("input_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 5120, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_37_cast_fp16)[name = string("input_cast_fp16")];
+            string var_1185_pad_type_0 = const()[name = string("op_1185_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1185_strides_0 = const()[name = string("op_1185_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1185_pad_0 = const()[name = string("op_1185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1185_dilations_0 = const()[name = string("op_1185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1185_groups_0 = const()[name = string("op_1185_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188876544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192153408))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1280]> layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192153536)))];
+            tensor<fp16, [1, 1280, 1, 1]> var_1185_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1185_dilations_0, groups = var_1185_groups_0, pad = var_1185_pad_0, pad_type = var_1185_pad_type_0, strides = var_1185_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("op_1185_cast_fp16")];
+            string var_1191_pad_type_0 = const()[name = string("op_1191_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1191_strides_0 = const()[name = string("op_1191_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1191_pad_0 = const()[name = string("op_1191_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1191_dilations_0 = const()[name = string("op_1191_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1191_groups_0 = const()[name = string("op_1191_groups_0"), val = int32(1)];
+            tensor<fp16, [1280, 5120, 1, 1]> layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [1280, 5120, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192226688))), nonzero_data = tensor<fp16, [35232]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(192156160))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 1280, 1, 1]> var_1191_cast_fp16 = conv(dilations = var_1191_dilations_0, groups = var_1191_groups_0, pad = var_1191_pad_0, pad_type = var_1191_pad_type_0, strides = var_1191_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = string("op_1191_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> hidden_states_9_cast_fp16 = add(x = var_1185_cast_fp16, y = var_1191_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 1280, 1, 1]> inputs_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = string("out_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1211_to_fp16 = const()[name = string("op_1211_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_1211_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")];
+            tensor<fp16, [1280]> hidden_states_gamma_0_to_fp16 = const()[name = string("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193045952)))];
+            tensor<fp16, [1280]> hidden_states_beta_0_to_fp16 = const()[name = string("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [1280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193048576)))];
+            fp16 hidden_states_epsilon_0_to_fp16 = const()[name = string("hidden_states_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 1280, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_1222_axes_0 = const()[name = string("op_1222_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1280, 1]> var_1222_cast_fp16 = squeeze(axes = var_1222_axes_0, x = hidden_states_cast_fp16)[name = string("op_1222_cast_fp16")];
+            tensor<int32, [3]> var_1225_perm_0 = const()[name = string("op_1225_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51866]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51866]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193051200)))];
+            tensor<fp16, [1, 1, 1280]> var_1225_cast_fp16 = transpose(perm = var_1225_perm_0, x = var_1222_cast_fp16)[name = string("transpose_0")];
+            tensor<fp16, [1, 1, 51866]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_1225_cast_fp16)[name = string("linear_0_cast_fp16")];
+            int32 var_1229 = const()[name = string("op_1229"), val = int32(1)];
+            bool obj_81_interleave_0 = const()[name = string("obj_81_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 5120, 1, 1]> key_cache_updates = concat(axis = var_1229, interleave = obj_81_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_cast_fp16))[name = string("obj_81_cast_fp16")];
+            int32 var_1232 = const()[name = string("op_1232"), val = int32(1)];
+            bool obj_83_interleave_0 = const()[name = string("obj_83_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 5120, 1, 1]> value_cache_updates = concat(axis = var_1232, interleave = obj_83_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_cast_fp16))[name = string("obj_83_cast_fp16")];
+            tensor<int32, [4]> var_1243_begin_0 = const()[name = string("op_1243_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_1243_end_0 = const()[name = string("op_1243_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1536])];
+            tensor<bool, [4]> var_1243_end_mask_0 = const()[name = string("op_1243_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_1243_cast_fp16 = slice_by_index(begin = var_1243_begin_0, end = var_1243_end_0, end_mask = var_1243_end_mask_0, x = obj_59_cast_fp16)[name = string("op_1243_cast_fp16")];
+            tensor<int32, [4]> var_1246_begin_0 = const()[name = string("op_1246_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1246_end_0 = const()[name = string("op_1246_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_1246_end_mask_0 = const()[name = string("op_1246_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1246_squeeze_mask_0 = const()[name = string("op_1246_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_1246_cast_fp16 = slice_by_index(begin = var_1246_begin_0, end = var_1246_end_0, end_mask = var_1246_end_mask_0, squeeze_mask = var_1246_squeeze_mask_0, x = var_1243_cast_fp16)[name = string("op_1246_cast_fp16")];
+            tensor<int32, [4]> var_1261_begin_0 = const()[name = string("op_1261_begin_0"), val = tensor<int32, [4]>([0, 11, 0, 0])];
+            tensor<int32, [4]> var_1261_end_0 = const()[name = string("op_1261_end_0"), val = tensor<int32, [4]>([1, 12, 1, 1536])];
+            tensor<bool, [4]> var_1261_end_mask_0 = const()[name = string("op_1261_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_1261_cast_fp16 = slice_by_index(begin = var_1261_begin_0, end = var_1261_end_0, end_mask = var_1261_end_mask_0, x = obj_59_cast_fp16)[name = string("op_1261_cast_fp16")];
+            tensor<int32, [4]> var_1264_begin_0 = const()[name = string("op_1264_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1264_end_0 = const()[name = string("op_1264_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_1264_end_mask_0 = const()[name = string("op_1264_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1264_squeeze_mask_0 = const()[name = string("op_1264_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_1264_cast_fp16 = slice_by_index(begin = var_1264_begin_0, end = var_1264_end_0, end_mask = var_1264_end_mask_0, squeeze_mask = var_1264_squeeze_mask_0, x = var_1261_cast_fp16)[name = string("op_1264_cast_fp16")];
+            tensor<int32, [4]> var_1279_begin_0 = const()[name = string("op_1279_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_1279_end_0 = const()[name = string("op_1279_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1536])];
+            tensor<bool, [4]> var_1279_end_mask_0 = const()[name = string("op_1279_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_1279_cast_fp16 = slice_by_index(begin = var_1279_begin_0, end = var_1279_end_0, end_mask = var_1279_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1279_cast_fp16")];
+            tensor<int32, [4]> var_1282_begin_0 = const()[name = string("op_1282_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1282_end_0 = const()[name = string("op_1282_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_1282_end_mask_0 = const()[name = string("op_1282_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1282_squeeze_mask_0 = const()[name = string("op_1282_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_1282_cast_fp16 = slice_by_index(begin = var_1282_begin_0, end = var_1282_end_0, end_mask = var_1282_end_mask_0, squeeze_mask = var_1282_squeeze_mask_0, x = var_1279_cast_fp16)[name = string("op_1282_cast_fp16")];
+            tensor<int32, [4]> var_1297_begin_0 = const()[name = string("op_1297_begin_0"), val = tensor<int32, [4]>([0, 6, 0, 0])];
+            tensor<int32, [4]> var_1297_end_0 = const()[name = string("op_1297_end_0"), val = tensor<int32, [4]>([1, 7, 1, 1536])];
+            tensor<bool, [4]> var_1297_end_mask_0 = const()[name = string("op_1297_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_1297_cast_fp16 = slice_by_index(begin = var_1297_begin_0, end = var_1297_end_0, end_mask = var_1297_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1297_cast_fp16")];
+            tensor<int32, [4]> var_1300_begin_0 = const()[name = string("op_1300_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1300_end_0 = const()[name = string("op_1300_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_1300_end_mask_0 = const()[name = string("op_1300_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1300_squeeze_mask_0 = const()[name = string("op_1300_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_1300_cast_fp16 = slice_by_index(begin = var_1300_begin_0, end = var_1300_end_0, end_mask = var_1300_end_mask_0, squeeze_mask = var_1300_squeeze_mask_0, x = var_1297_cast_fp16)[name = string("op_1300_cast_fp16")];
+            tensor<int32, [4]> var_1315_begin_0 = const()[name = string("op_1315_begin_0"), val = tensor<int32, [4]>([0, 11, 0, 0])];
+            tensor<int32, [4]> var_1315_end_0 = const()[name = string("op_1315_end_0"), val = tensor<int32, [4]>([1, 12, 1, 1536])];
+            tensor<bool, [4]> var_1315_end_mask_0 = const()[name = string("op_1315_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_1315_cast_fp16 = slice_by_index(begin = var_1315_begin_0, end = var_1315_end_0, end_mask = var_1315_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1315_cast_fp16")];
+            tensor<int32, [4]> var_1318_begin_0 = const()[name = string("op_1318_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1318_end_0 = const()[name = string("op_1318_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_1318_end_mask_0 = const()[name = string("op_1318_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1318_squeeze_mask_0 = const()[name = string("op_1318_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_1318_cast_fp16 = slice_by_index(begin = var_1318_begin_0, end = var_1318_end_0, end_mask = var_1318_end_mask_0, squeeze_mask = var_1318_squeeze_mask_0, x = var_1315_cast_fp16)[name = string("op_1318_cast_fp16")];
+            tensor<int32, [4]> var_1333_begin_0 = const()[name = string("op_1333_begin_0"), val = tensor<int32, [4]>([0, 14, 0, 0])];
+            tensor<int32, [4]> var_1333_end_0 = const()[name = string("op_1333_end_0"), val = tensor<int32, [4]>([1, 15, 1, 1536])];
+            tensor<bool, [4]> var_1333_end_mask_0 = const()[name = string("op_1333_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_1333_cast_fp16 = slice_by_index(begin = var_1333_begin_0, end = var_1333_end_0, end_mask = var_1333_end_mask_0, x = obj_77_cast_fp16)[name = string("op_1333_cast_fp16")];
+            tensor<int32, [4]> var_1336_begin_0 = const()[name = string("op_1336_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_1336_end_0 = const()[name = string("op_1336_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_1336_end_mask_0 = const()[name = string("op_1336_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_1336_squeeze_mask_0 = const()[name = string("op_1336_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_1336_cast_fp16 = slice_by_index(begin = var_1336_begin_0, end = var_1336_end_0, end_mask = var_1336_end_mask_0, squeeze_mask = var_1336_squeeze_mask_0, x = var_1333_cast_fp16)[name = string("op_1336_cast_fp16")];
+            int32 var_1343 = const()[name = string("op_1343"), val = int32(1)];
+            bool var_1344_interleave_0 = const()[name = string("op_1344_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 6, 1536]> var_1344_cast_fp16 = concat(axis = var_1343, interleave = var_1344_interleave_0, values = (var_1246_cast_fp16, var_1264_cast_fp16, var_1282_cast_fp16, var_1300_cast_fp16, var_1318_cast_fp16, var_1336_cast_fp16))[name = string("op_1344_cast_fp16")];
+            bool var_1347 = const()[name = string("op_1347"), val = bool(false)];
+            tensor<int32, [1]> obj_axes_0 = const()[name = string("obj_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1536]> alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_1347, x = var_1344_cast_fp16)[name = string("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mlmodel b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..f5a91f506674a923e4e9bfe0123f0161af952e6b
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:beb44115c7f126e0a9ea1733afa4b3a50609d361c43b14853b89a2da8000d220
+size 163308
diff --git a/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..be64706bd6740860db63c8126afb7e198dc51192
--- /dev/null
+++ b/openai_whisper-large-v3-v20240930_626MB/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43abea4ea336b9e458398af192796b78180565f3711530c0d8d8d7f192e199ff
+size 193154996
diff --git a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a65d3f6dc1d0d161f518cd63f4253aeeef9e0f76
--- /dev/null
+++ b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45601c764f7bc51711b42670d55580fa949cf76bbeebc328c60882c048499bf2
+size 243
diff --git a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4e7a2bb9e7ef3870c50986867ffd5a6788e5ed84
--- /dev/null
+++ b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a078e65c9369ce8a4a687a2bbb0a8befbd4ed459250c0442176824906fa95ee1
+size 433
diff --git a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..631759607c66aadc8a348061f8ecbb631c370929
--- /dev/null
+++ b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,92 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (4 bits), Sparse)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 768 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 768, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 12 × 768 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 1536]",
+        "name" : "encoder_attn_key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 12 × 768 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 1536]",
+        "name" : "encoder_attn_value_cache",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.constexprLutToDense" : 98,
+      "Ios18.constexprSparseToDense" : 96,
+      "Ios18.conv" : 196,
+      "Ios18.matmul" : 24,
+      "Ios18.batchNorm" : 25,
+      "Pad" : 2,
+      "Ios18.gelu" : 14,
+      "Ios18.concat" : 2,
+      "Ios18.add" : 123,
+      "Ios18.softmax" : 12,
+      "Ios18.layerNorm" : 25,
+      "Ios18.reshape" : 48,
+      "Ios18.mul" : 12
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoderStateful",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..64f9526e8e6c2e90dba45366a802359a6365d08d
--- /dev/null
+++ b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,2011 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features) {
+            string var_100_pad_type_0 = const()[name = string("op_100_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_100_pad_0 = const()[name = string("op_100_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_100_strides_0 = const()[name = string("op_100_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_100_dilations_0 = const()[name = string("op_100_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_100_groups_0 = const()[name = string("op_100_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 80, 1, 3]> var_69_to_fp16 = const()[name = string("op_69_to_fp16"), val = tensor<fp16, [768, 80, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [768]> var_81_to_fp16 = const()[name = string("op_81_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368768)))];
+            tensor<fp16, [1, 768, 1, 3000]> var_100_cast_fp16 = conv(bias = var_81_to_fp16, dilations = var_100_dilations_0, groups = var_100_groups_0, pad = var_100_pad_0, pad_type = var_100_pad_type_0, strides = var_100_strides_0, weight = var_69_to_fp16, x = melspectrogram_features)[name = string("op_100_cast_fp16")];
+            string var_138_pad_type_0 = const()[name = string("op_138_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_138_pad_0 = const()[name = string("op_138_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_138_strides_0 = const()[name = string("op_138_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_138_dilations_0 = const()[name = string("op_138_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_138_groups_0 = const()[name = string("op_138_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 80, 1, 3]> op_113_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 80, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370368))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462592))))[name = string("op_113_to_fp16_palettized")];
+            tensor<fp16, [768]> var_119_to_fp16 = const()[name = string("op_119_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462720)))];
+            tensor<fp16, [1, 768, 1, 3000]> var_138_cast_fp16 = conv(bias = var_119_to_fp16, dilations = var_138_dilations_0, groups = var_138_groups_0, pad = var_138_pad_0, pad_type = var_138_pad_type_0, strides = var_138_strides_0, weight = op_113_to_fp16_palettized, x = melspectrogram_features)[name = string("op_138_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 3000]> var_140_cast_fp16 = add(x = var_100_cast_fp16, y = var_138_cast_fp16)[name = string("op_140_cast_fp16")];
+            string hidden_states_1_mode_0 = const()[name = string("hidden_states_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 768, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_140_cast_fp16)[name = string("hidden_states_1_cast_fp16")];
+            string var_186_pad_type_0 = const()[name = string("op_186_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_186_pad_0 = const()[name = string("op_186_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_186_strides_0 = const()[name = string("op_186_strides_0"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_186_dilations_0 = const()[name = string("op_186_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_186_groups_0 = const()[name = string("op_186_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 3]> var_155_to_fp16 = const()[name = string("op_155_to_fp16"), val = tensor<fp16, [768, 768, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464320)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_186_cast_fp16 = conv(bias = var_81_to_fp16, dilations = var_186_dilations_0, groups = var_186_groups_0, pad = var_186_pad_0, pad_type = var_186_pad_type_0, strides = var_186_strides_0, weight = var_155_to_fp16, x = hidden_states_1_cast_fp16)[name = string("op_186_cast_fp16")];
+            string var_224_pad_type_0 = const()[name = string("op_224_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_224_pad_0 = const()[name = string("op_224_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_224_strides_0 = const()[name = string("op_224_strides_0"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_224_dilations_0 = const()[name = string("op_224_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_224_groups_0 = const()[name = string("op_224_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 3]> op_199_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4003328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4888128))))[name = string("op_199_to_fp16_palettized")];
+            tensor<fp16, [768]> var_205_to_fp16 = const()[name = string("op_205_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4888256)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_224_cast_fp16 = conv(bias = var_205_to_fp16, dilations = var_224_dilations_0, groups = var_224_groups_0, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_224_strides_0, weight = op_199_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = string("op_224_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_226_cast_fp16 = add(x = var_186_cast_fp16, y = var_224_cast_fp16)[name = string("op_226_cast_fp16")];
+            string hidden_states_3_mode_0 = const()[name = string("hidden_states_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_226_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_246_to_fp16 = const()[name = string("op_246_to_fp16"), val = tensor<fp16, [1, 768, 1, 1500]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4889856)))];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_246_to_fp16)[name = string("inputs_1_cast_fp16")];
+            int32 var_260 = const()[name = string("op_260"), val = int32(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_279_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")];
+            tensor<fp16, [768]> obj_1_variance_0_to_fp16 = const()[name = string("obj_1_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7193920)))];
+            tensor<fp16, [768]> obj_1_gamma_0_to_fp16 = const()[name = string("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7195520)))];
+            tensor<fp16, [768]> obj_1_beta_0_to_fp16 = const()[name = string("obj_1_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7197120)))];
+            fp16 obj_1_epsilon_0_to_fp16 = const()[name = string("obj_1_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_1_cast_fp16")];
+            string var_301_pad_type_0 = const()[name = string("op_301_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_301_strides_0 = const()[name = string("op_301_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_301_pad_0 = const()[name = string("op_301_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_301_dilations_0 = const()[name = string("op_301_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_301_groups_0 = const()[name = string("op_301_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7198720))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7493696))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7493824)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_301_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_301_dilations_0, groups = var_301_groups_0, pad = var_301_pad_0, pad_type = var_301_pad_type_0, strides = var_301_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_301_cast_fp16")];
+            string var_307_pad_type_0 = const()[name = string("op_307_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_307_strides_0 = const()[name = string("op_307_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_307_pad_0 = const()[name = string("op_307_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_307_dilations_0 = const()[name = string("op_307_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_307_groups_0 = const()[name = string("op_307_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7518208))), nonzero_data = tensor<fp16, [11338]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7495424))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_307_cast_fp16 = conv(dilations = var_307_dilations_0, groups = var_307_groups_0, pad = var_307_pad_0, pad_type = var_307_pad_type_0, strides = var_307_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_307_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_1_cast_fp16 = add(x = var_301_cast_fp16, y = var_307_cast_fp16)[name = string("query_1_cast_fp16")];
+            string var_316_pad_type_0 = const()[name = string("op_316_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_316_strides_0 = const()[name = string("op_316_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_316_pad_0 = const()[name = string("op_316_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_316_dilations_0 = const()[name = string("op_316_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_316_groups_0 = const()[name = string("op_316_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7592000))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7886976))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_316_cast_fp16 = conv(dilations = var_316_dilations_0, groups = var_316_groups_0, pad = var_316_pad_0, pad_type = var_316_pad_type_0, strides = var_316_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_316_cast_fp16")];
+            string var_322_pad_type_0 = const()[name = string("op_322_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_322_strides_0 = const()[name = string("op_322_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_322_pad_0 = const()[name = string("op_322_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_322_dilations_0 = const()[name = string("op_322_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_322_groups_0 = const()[name = string("op_322_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7908352))), nonzero_data = tensor<fp16, [10583]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7887104))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_322_cast_fp16 = conv(dilations = var_322_dilations_0, groups = var_322_groups_0, pad = var_322_pad_0, pad_type = var_322_pad_type_0, strides = var_322_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_322_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_1_cast_fp16 = add(x = var_316_cast_fp16, y = var_322_cast_fp16)[name = string("key_1_cast_fp16")];
+            string var_332_pad_type_0 = const()[name = string("op_332_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_332_strides_0 = const()[name = string("op_332_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_332_pad_0 = const()[name = string("op_332_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_332_dilations_0 = const()[name = string("op_332_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_332_groups_0 = const()[name = string("op_332_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7982144))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8277120))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8277248)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_332_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_332_dilations_0, groups = var_332_groups_0, pad = var_332_pad_0, pad_type = var_332_pad_type_0, strides = var_332_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_332_cast_fp16")];
+            string var_338_pad_type_0 = const()[name = string("op_338_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_338_strides_0 = const()[name = string("op_338_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_338_pad_0 = const()[name = string("op_338_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_338_dilations_0 = const()[name = string("op_338_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_338_groups_0 = const()[name = string("op_338_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8302400))), nonzero_data = tensor<fp16, [11740]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8278848))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_338_cast_fp16 = conv(dilations = var_338_dilations_0, groups = var_338_groups_0, pad = var_338_pad_0, pad_type = var_338_pad_type_0, strides = var_338_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_338_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_1_cast_fp16 = add(x = var_332_cast_fp16, y = var_338_cast_fp16)[name = string("value_1_cast_fp16")];
+            tensor<int32, [4]> var_341 = const()[name = string("op_341"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_1_cast_fp16 = reshape(shape = var_341, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")];
+            fp16 var_343_to_fp16 = const()[name = string("op_343_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_344_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_343_to_fp16)[name = string("op_344_cast_fp16")];
+            tensor<int32, [4]> var_345 = const()[name = string("op_345"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_346_cast_fp16 = reshape(shape = var_345, x = key_1_cast_fp16)[name = string("op_346_cast_fp16")];
+            bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)];
+            bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_344_cast_fp16, y = var_346_cast_fp16)[name = string("mh_w_1_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_349_cast_fp16 = softmax(axis = var_260, x = mh_w_1_cast_fp16)[name = string("op_349_cast_fp16")];
+            tensor<int32, [4]> var_350 = const()[name = string("op_350"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_351_cast_fp16 = reshape(shape = var_350, x = value_1_cast_fp16)[name = string("op_351_cast_fp16")];
+            bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)];
+            bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_351_cast_fp16, y = var_349_cast_fp16)[name = string("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_354 = const()[name = string("op_354"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_1_cast_fp16 = reshape(shape = var_354, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_364_pad_type_0 = const()[name = string("op_364_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_364_strides_0 = const()[name = string("op_364_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_364_pad_0 = const()[name = string("op_364_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_364_dilations_0 = const()[name = string("op_364_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_364_groups_0 = const()[name = string("op_364_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8376192))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8671168))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8671296)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_364_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_364_dilations_0, groups = var_364_groups_0, pad = var_364_pad_0, pad_type = var_364_pad_type_0, strides = var_364_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_364_cast_fp16")];
+            string var_370_pad_type_0 = const()[name = string("op_370_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_370_strides_0 = const()[name = string("op_370_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_370_pad_0 = const()[name = string("op_370_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_370_dilations_0 = const()[name = string("op_370_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_370_groups_0 = const()[name = string("op_370_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8692480))), nonzero_data = tensor<fp16, [9758]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8672896))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_370_cast_fp16 = conv(dilations = var_370_dilations_0, groups = var_370_groups_0, pad = var_370_pad_0, pad_type = var_370_pad_type_0, strides = var_370_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_370_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_3_cast_fp16 = add(x = var_364_cast_fp16, y = var_370_cast_fp16)[name = string("obj_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = string("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_381_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")];
+            tensor<fp16, [768]> input_3_gamma_0_to_fp16 = const()[name = string("input_3_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8766272)))];
+            tensor<fp16, [768]> input_3_beta_0_to_fp16 = const()[name = string("input_3_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8767872)))];
+            fp16 input_3_epsilon_0_to_fp16 = const()[name = string("input_3_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("input_3_cast_fp16")];
+            string var_399_pad_type_0 = const()[name = string("op_399_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_399_strides_0 = const()[name = string("op_399_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_399_pad_0 = const()[name = string("op_399_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_399_dilations_0 = const()[name = string("op_399_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_399_groups_0 = const()[name = string("op_399_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8769472))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9949184))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9949312)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_399_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_399_dilations_0, groups = var_399_groups_0, pad = var_399_pad_0, pad_type = var_399_pad_type_0, strides = var_399_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_399_cast_fp16")];
+            string var_405_pad_type_0 = const()[name = string("op_405_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_405_strides_0 = const()[name = string("op_405_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_405_pad_0 = const()[name = string("op_405_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_405_dilations_0 = const()[name = string("op_405_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_405_groups_0 = const()[name = string("op_405_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10044736))), nonzero_data = tensor<fp16, [44545]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9955520))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_405_cast_fp16 = conv(dilations = var_405_dilations_0, groups = var_405_groups_0, pad = var_405_pad_0, pad_type = var_405_pad_type_0, strides = var_405_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_405_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_5_cast_fp16 = add(x = var_399_cast_fp16, y = var_405_cast_fp16)[name = string("input_5_cast_fp16")];
+            string input_7_mode_0 = const()[name = string("input_7_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
+            string var_416_pad_type_0 = const()[name = string("op_416_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_416_strides_0 = const()[name = string("op_416_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_416_pad_0 = const()[name = string("op_416_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_416_dilations_0 = const()[name = string("op_416_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_416_groups_0 = const()[name = string("op_416_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10339712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11519424))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11519552)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_416_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_416_dilations_0, groups = var_416_groups_0, pad = var_416_pad_0, pad_type = var_416_pad_type_0, strides = var_416_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = string("op_416_cast_fp16")];
+            string var_422_pad_type_0 = const()[name = string("op_422_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_422_strides_0 = const()[name = string("op_422_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_422_pad_0 = const()[name = string("op_422_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_422_dilations_0 = const()[name = string("op_422_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_422_groups_0 = const()[name = string("op_422_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11594560))), nonzero_data = tensor<fp16, [36651]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11521152))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_422_cast_fp16 = conv(dilations = var_422_dilations_0, groups = var_422_groups_0, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_422_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = string("op_422_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_5_cast_fp16 = add(x = var_416_cast_fp16, y = var_422_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_5_cast_fp16")];
+            int32 var_432 = const()[name = string("op_432"), val = int32(3)];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_451_to_fp16 = const()[name = string("op_451_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_451_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")];
+            tensor<fp16, [768]> obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11889536)))];
+            tensor<fp16, [768]> obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11891136)))];
+            fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("obj_5_cast_fp16")];
+            string var_473_pad_type_0 = const()[name = string("op_473_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_473_strides_0 = const()[name = string("op_473_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_473_pad_0 = const()[name = string("op_473_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_473_dilations_0 = const()[name = string("op_473_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_473_groups_0 = const()[name = string("op_473_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11892736))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12187712))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12187840)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_473_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_473_cast_fp16")];
+            string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12206656))), nonzero_data = tensor<fp16, [8573]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12189440))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_479_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = string("query_3_cast_fp16")];
+            string var_488_pad_type_0 = const()[name = string("op_488_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_488_strides_0 = const()[name = string("op_488_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_488_pad_0 = const()[name = string("op_488_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_488_dilations_0 = const()[name = string("op_488_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_488_groups_0 = const()[name = string("op_488_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12280448))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12575424))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_488_cast_fp16 = conv(dilations = var_488_dilations_0, groups = var_488_groups_0, pad = var_488_pad_0, pad_type = var_488_pad_type_0, strides = var_488_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_488_cast_fp16")];
+            string var_494_pad_type_0 = const()[name = string("op_494_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_494_strides_0 = const()[name = string("op_494_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_494_pad_0 = const()[name = string("op_494_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_494_dilations_0 = const()[name = string("op_494_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_494_groups_0 = const()[name = string("op_494_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12591360))), nonzero_data = tensor<fp16, [7864]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12575552))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_494_cast_fp16 = conv(dilations = var_494_dilations_0, groups = var_494_groups_0, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_494_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_494_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_3_cast_fp16 = add(x = var_488_cast_fp16, y = var_494_cast_fp16)[name = string("key_3_cast_fp16")];
+            string var_504_pad_type_0 = const()[name = string("op_504_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_504_strides_0 = const()[name = string("op_504_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_504_pad_0 = const()[name = string("op_504_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_504_dilations_0 = const()[name = string("op_504_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_504_groups_0 = const()[name = string("op_504_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12665152))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12960128))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12960256)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_504_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_504_dilations_0, groups = var_504_groups_0, pad = var_504_pad_0, pad_type = var_504_pad_type_0, strides = var_504_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_504_cast_fp16")];
+            string var_510_pad_type_0 = const()[name = string("op_510_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_510_strides_0 = const()[name = string("op_510_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_510_pad_0 = const()[name = string("op_510_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_510_dilations_0 = const()[name = string("op_510_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_510_groups_0 = const()[name = string("op_510_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12975872))), nonzero_data = tensor<fp16, [6964]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12961856))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_510_cast_fp16 = conv(dilations = var_510_dilations_0, groups = var_510_groups_0, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_510_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_510_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_3_cast_fp16 = add(x = var_504_cast_fp16, y = var_510_cast_fp16)[name = string("value_3_cast_fp16")];
+            tensor<int32, [4]> var_513 = const()[name = string("op_513"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_3_cast_fp16 = reshape(shape = var_513, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")];
+            fp16 var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_516_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_515_to_fp16)[name = string("op_516_cast_fp16")];
+            tensor<int32, [4]> var_517 = const()[name = string("op_517"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_518_cast_fp16 = reshape(shape = var_517, x = key_3_cast_fp16)[name = string("op_518_cast_fp16")];
+            bool mh_w_3_transpose_x_0 = const()[name = string("mh_w_3_transpose_x_0"), val = bool(true)];
+            bool mh_w_3_transpose_y_0 = const()[name = string("mh_w_3_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_516_cast_fp16, y = var_518_cast_fp16)[name = string("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_521_cast_fp16 = softmax(axis = var_432, x = mh_w_3_cast_fp16)[name = string("op_521_cast_fp16")];
+            tensor<int32, [4]> var_522 = const()[name = string("op_522"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_523_cast_fp16 = reshape(shape = var_522, x = value_3_cast_fp16)[name = string("op_523_cast_fp16")];
+            bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)];
+            bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_523_cast_fp16, y = var_521_cast_fp16)[name = string("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_526 = const()[name = string("op_526"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_9_cast_fp16 = reshape(shape = var_526, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")];
+            string var_536_pad_type_0 = const()[name = string("op_536_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_536_strides_0 = const()[name = string("op_536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_536_pad_0 = const()[name = string("op_536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_536_dilations_0 = const()[name = string("op_536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_536_groups_0 = const()[name = string("op_536_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13049664))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13344640))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13344768)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_536_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_536_dilations_0, groups = var_536_groups_0, pad = var_536_pad_0, pad_type = var_536_pad_type_0, strides = var_536_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_536_cast_fp16")];
+            string var_542_pad_type_0 = const()[name = string("op_542_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_542_strides_0 = const()[name = string("op_542_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_542_pad_0 = const()[name = string("op_542_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_542_dilations_0 = const()[name = string("op_542_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_542_groups_0 = const()[name = string("op_542_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13357056))), nonzero_data = tensor<fp16, [5299]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13346368))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_542_cast_fp16 = conv(dilations = var_542_dilations_0, groups = var_542_groups_0, pad = var_542_pad_0, pad_type = var_542_pad_type_0, strides = var_542_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_542_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_7_cast_fp16 = add(x = var_536_cast_fp16, y = var_542_cast_fp16)[name = string("obj_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = string("inputs_7_cast_fp16")];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_553_to_fp16 = const()[name = string("op_553_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_553_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")];
+            tensor<fp16, [768]> input_11_gamma_0_to_fp16 = const()[name = string("input_11_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13430848)))];
+            tensor<fp16, [768]> input_11_beta_0_to_fp16 = const()[name = string("input_11_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13432448)))];
+            fp16 input_11_epsilon_0_to_fp16 = const()[name = string("input_11_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("input_11_cast_fp16")];
+            string var_571_pad_type_0 = const()[name = string("op_571_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_571_strides_0 = const()[name = string("op_571_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_571_pad_0 = const()[name = string("op_571_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_571_dilations_0 = const()[name = string("op_571_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_571_groups_0 = const()[name = string("op_571_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13434048))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14613760))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14613888)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_571_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_571_dilations_0, groups = var_571_groups_0, pad = var_571_pad_0, pad_type = var_571_pad_type_0, strides = var_571_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_571_cast_fp16")];
+            string var_577_pad_type_0 = const()[name = string("op_577_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_577_strides_0 = const()[name = string("op_577_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_577_pad_0 = const()[name = string("op_577_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_577_dilations_0 = const()[name = string("op_577_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_577_groups_0 = const()[name = string("op_577_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14688448))), nonzero_data = tensor<fp16, [34117]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14620096))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_577_cast_fp16 = conv(dilations = var_577_dilations_0, groups = var_577_groups_0, pad = var_577_pad_0, pad_type = var_577_pad_type_0, strides = var_577_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_577_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_13_cast_fp16 = add(x = var_571_cast_fp16, y = var_577_cast_fp16)[name = string("input_13_cast_fp16")];
+            string input_15_mode_0 = const()[name = string("input_15_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = string("input_15_cast_fp16")];
+            string var_588_pad_type_0 = const()[name = string("op_588_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_588_strides_0 = const()[name = string("op_588_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_588_pad_0 = const()[name = string("op_588_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_588_dilations_0 = const()[name = string("op_588_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_588_groups_0 = const()[name = string("op_588_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14983424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16163136))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16163264)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_588_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_588_dilations_0, groups = var_588_groups_0, pad = var_588_pad_0, pad_type = var_588_pad_type_0, strides = var_588_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_588_cast_fp16")];
+            string var_594_pad_type_0 = const()[name = string("op_594_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_594_strides_0 = const()[name = string("op_594_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_594_pad_0 = const()[name = string("op_594_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_594_dilations_0 = const()[name = string("op_594_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_594_groups_0 = const()[name = string("op_594_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16225152))), nonzero_data = tensor<fp16, [30100]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16164864))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_594_cast_fp16 = conv(dilations = var_594_dilations_0, groups = var_594_groups_0, pad = var_594_pad_0, pad_type = var_594_pad_type_0, strides = var_594_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_594_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_7_cast_fp16 = add(x = var_588_cast_fp16, y = var_594_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_9_cast_fp16")];
+            int32 var_604 = const()[name = string("op_604"), val = int32(3)];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_623_to_fp16 = const()[name = string("op_623_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_623_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")];
+            tensor<fp16, [768]> obj_9_gamma_0_to_fp16 = const()[name = string("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16520128)))];
+            tensor<fp16, [768]> obj_9_beta_0_to_fp16 = const()[name = string("obj_9_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16521728)))];
+            fp16 obj_9_epsilon_0_to_fp16 = const()[name = string("obj_9_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_9_cast_fp16")];
+            string var_645_pad_type_0 = const()[name = string("op_645_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_645_strides_0 = const()[name = string("op_645_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_645_pad_0 = const()[name = string("op_645_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_645_dilations_0 = const()[name = string("op_645_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_645_groups_0 = const()[name = string("op_645_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16523328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16818304))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16818432)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_645_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_645_dilations_0, groups = var_645_groups_0, pad = var_645_pad_0, pad_type = var_645_pad_type_0, strides = var_645_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_645_cast_fp16")];
+            string var_651_pad_type_0 = const()[name = string("op_651_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_651_strides_0 = const()[name = string("op_651_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_651_pad_0 = const()[name = string("op_651_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_651_dilations_0 = const()[name = string("op_651_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_651_groups_0 = const()[name = string("op_651_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16834112))), nonzero_data = tensor<fp16, [6978]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16820032))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_651_cast_fp16 = conv(dilations = var_651_dilations_0, groups = var_651_groups_0, pad = var_651_pad_0, pad_type = var_651_pad_type_0, strides = var_651_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_651_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_5_cast_fp16 = add(x = var_645_cast_fp16, y = var_651_cast_fp16)[name = string("query_5_cast_fp16")];
+            string var_660_pad_type_0 = const()[name = string("op_660_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_660_strides_0 = const()[name = string("op_660_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_660_pad_0 = const()[name = string("op_660_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_660_dilations_0 = const()[name = string("op_660_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_660_groups_0 = const()[name = string("op_660_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16907904))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17202880))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_660_cast_fp16 = conv(dilations = var_660_dilations_0, groups = var_660_groups_0, pad = var_660_pad_0, pad_type = var_660_pad_type_0, strides = var_660_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_660_cast_fp16")];
+            string var_666_pad_type_0 = const()[name = string("op_666_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_666_strides_0 = const()[name = string("op_666_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_666_pad_0 = const()[name = string("op_666_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_666_dilations_0 = const()[name = string("op_666_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_666_groups_0 = const()[name = string("op_666_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17216384))), nonzero_data = tensor<fp16, [6646]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17203008))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_666_cast_fp16 = conv(dilations = var_666_dilations_0, groups = var_666_groups_0, pad = var_666_pad_0, pad_type = var_666_pad_type_0, strides = var_666_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_666_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_5_cast_fp16 = add(x = var_660_cast_fp16, y = var_666_cast_fp16)[name = string("key_5_cast_fp16")];
+            string var_676_pad_type_0 = const()[name = string("op_676_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_676_strides_0 = const()[name = string("op_676_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_676_pad_0 = const()[name = string("op_676_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_676_dilations_0 = const()[name = string("op_676_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_676_groups_0 = const()[name = string("op_676_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17290176))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17585152))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17585280)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_676_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_676_dilations_0, groups = var_676_groups_0, pad = var_676_pad_0, pad_type = var_676_pad_type_0, strides = var_676_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_676_cast_fp16")];
+            string var_682_pad_type_0 = const()[name = string("op_682_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_682_strides_0 = const()[name = string("op_682_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_682_pad_0 = const()[name = string("op_682_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_682_dilations_0 = const()[name = string("op_682_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_682_groups_0 = const()[name = string("op_682_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17596992))), nonzero_data = tensor<fp16, [5006]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17586880))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_682_cast_fp16 = conv(dilations = var_682_dilations_0, groups = var_682_groups_0, pad = var_682_pad_0, pad_type = var_682_pad_type_0, strides = var_682_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_682_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_5_cast_fp16 = add(x = var_676_cast_fp16, y = var_682_cast_fp16)[name = string("value_5_cast_fp16")];
+            tensor<int32, [4]> var_685 = const()[name = string("op_685"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_5_cast_fp16 = reshape(shape = var_685, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")];
+            fp16 var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_688_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_687_to_fp16)[name = string("op_688_cast_fp16")];
+            tensor<int32, [4]> var_689 = const()[name = string("op_689"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_690_cast_fp16 = reshape(shape = var_689, x = key_5_cast_fp16)[name = string("op_690_cast_fp16")];
+            bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)];
+            bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_688_cast_fp16, y = var_690_cast_fp16)[name = string("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_693_cast_fp16 = softmax(axis = var_604, x = mh_w_5_cast_fp16)[name = string("op_693_cast_fp16")];
+            tensor<int32, [4]> var_694 = const()[name = string("op_694"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_695_cast_fp16 = reshape(shape = var_694, x = value_5_cast_fp16)[name = string("op_695_cast_fp16")];
+            bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)];
+            bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_695_cast_fp16, y = var_693_cast_fp16)[name = string("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_698 = const()[name = string("op_698"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_17_cast_fp16 = reshape(shape = var_698, x = attn_5_cast_fp16)[name = string("input_17_cast_fp16")];
+            string var_708_pad_type_0 = const()[name = string("op_708_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_708_strides_0 = const()[name = string("op_708_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_708_pad_0 = const()[name = string("op_708_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_708_dilations_0 = const()[name = string("op_708_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_708_groups_0 = const()[name = string("op_708_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17670784))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17965760))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17965888)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_708_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_708_dilations_0, groups = var_708_groups_0, pad = var_708_pad_0, pad_type = var_708_pad_type_0, strides = var_708_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = string("op_708_cast_fp16")];
+            string var_714_pad_type_0 = const()[name = string("op_714_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_714_strides_0 = const()[name = string("op_714_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_714_pad_0 = const()[name = string("op_714_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_714_dilations_0 = const()[name = string("op_714_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_714_groups_0 = const()[name = string("op_714_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17976064))), nonzero_data = tensor<fp16, [4255]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17967488))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_714_cast_fp16 = conv(dilations = var_714_dilations_0, groups = var_714_groups_0, pad = var_714_pad_0, pad_type = var_714_pad_type_0, strides = var_714_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = string("op_714_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_11_cast_fp16 = add(x = var_708_cast_fp16, y = var_714_cast_fp16)[name = string("obj_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_725_to_fp16 = const()[name = string("op_725_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_725_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")];
+            tensor<fp16, [768]> input_19_gamma_0_to_fp16 = const()[name = string("input_19_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18049856)))];
+            tensor<fp16, [768]> input_19_beta_0_to_fp16 = const()[name = string("input_19_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18051456)))];
+            fp16 input_19_epsilon_0_to_fp16 = const()[name = string("input_19_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_19_cast_fp16")];
+            string var_743_pad_type_0 = const()[name = string("op_743_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_743_strides_0 = const()[name = string("op_743_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_743_pad_0 = const()[name = string("op_743_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_743_dilations_0 = const()[name = string("op_743_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_743_groups_0 = const()[name = string("op_743_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18053056))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19232768))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19232896)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_743_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_743_dilations_0, groups = var_743_groups_0, pad = var_743_pad_0, pad_type = var_743_pad_type_0, strides = var_743_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_743_cast_fp16")];
+            string var_749_pad_type_0 = const()[name = string("op_749_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_749_strides_0 = const()[name = string("op_749_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_749_pad_0 = const()[name = string("op_749_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_749_dilations_0 = const()[name = string("op_749_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_749_groups_0 = const()[name = string("op_749_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19298752))), nonzero_data = tensor<fp16, [29780]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19239104))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_749_cast_fp16 = conv(dilations = var_749_dilations_0, groups = var_749_groups_0, pad = var_749_pad_0, pad_type = var_749_pad_type_0, strides = var_749_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_749_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_21_cast_fp16 = add(x = var_743_cast_fp16, y = var_749_cast_fp16)[name = string("input_21_cast_fp16")];
+            string input_23_mode_0 = const()[name = string("input_23_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = string("input_23_cast_fp16")];
+            string var_760_pad_type_0 = const()[name = string("op_760_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_760_strides_0 = const()[name = string("op_760_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_760_pad_0 = const()[name = string("op_760_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_760_dilations_0 = const()[name = string("op_760_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_760_groups_0 = const()[name = string("op_760_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19593728))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20773440))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20773568)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_760_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_760_dilations_0, groups = var_760_groups_0, pad = var_760_pad_0, pad_type = var_760_pad_type_0, strides = var_760_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_760_cast_fp16")];
+            string var_766_pad_type_0 = const()[name = string("op_766_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_766_strides_0 = const()[name = string("op_766_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_766_pad_0 = const()[name = string("op_766_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_766_dilations_0 = const()[name = string("op_766_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_766_groups_0 = const()[name = string("op_766_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20834944))), nonzero_data = tensor<fp16, [29841]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20775168))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_766_cast_fp16 = conv(dilations = var_766_dilations_0, groups = var_766_groups_0, pad = var_766_pad_0, pad_type = var_766_pad_type_0, strides = var_766_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_766_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_9_cast_fp16 = add(x = var_760_cast_fp16, y = var_766_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_13_cast_fp16")];
+            int32 var_776 = const()[name = string("op_776"), val = int32(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_795_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")];
+            tensor<fp16, [768]> obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21129920)))];
+            tensor<fp16, [768]> obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21131520)))];
+            fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_13_cast_fp16")];
+            string var_817_pad_type_0 = const()[name = string("op_817_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_817_strides_0 = const()[name = string("op_817_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_817_pad_0 = const()[name = string("op_817_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_817_dilations_0 = const()[name = string("op_817_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_817_groups_0 = const()[name = string("op_817_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21133120))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21428096))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21428224)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_817_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_817_dilations_0, groups = var_817_groups_0, pad = var_817_pad_0, pad_type = var_817_pad_type_0, strides = var_817_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_817_cast_fp16")];
+            string var_823_pad_type_0 = const()[name = string("op_823_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_823_strides_0 = const()[name = string("op_823_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_823_pad_0 = const()[name = string("op_823_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_823_dilations_0 = const()[name = string("op_823_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_823_groups_0 = const()[name = string("op_823_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21440384))), nonzero_data = tensor<fp16, [5218]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21429824))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_823_cast_fp16 = conv(dilations = var_823_dilations_0, groups = var_823_groups_0, pad = var_823_pad_0, pad_type = var_823_pad_type_0, strides = var_823_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_823_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_7_cast_fp16 = add(x = var_817_cast_fp16, y = var_823_cast_fp16)[name = string("query_7_cast_fp16")];
+            string var_832_pad_type_0 = const()[name = string("op_832_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_832_strides_0 = const()[name = string("op_832_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_832_pad_0 = const()[name = string("op_832_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_832_dilations_0 = const()[name = string("op_832_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_832_groups_0 = const()[name = string("op_832_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21514176))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21809152))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_832_cast_fp16 = conv(dilations = var_832_dilations_0, groups = var_832_groups_0, pad = var_832_pad_0, pad_type = var_832_pad_type_0, strides = var_832_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_832_cast_fp16")];
+            string var_838_pad_type_0 = const()[name = string("op_838_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_838_strides_0 = const()[name = string("op_838_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_838_pad_0 = const()[name = string("op_838_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_838_dilations_0 = const()[name = string("op_838_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_838_groups_0 = const()[name = string("op_838_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21819776))), nonzero_data = tensor<fp16, [5203]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21809280))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_838_cast_fp16 = conv(dilations = var_838_dilations_0, groups = var_838_groups_0, pad = var_838_pad_0, pad_type = var_838_pad_type_0, strides = var_838_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_838_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_7_cast_fp16 = add(x = var_832_cast_fp16, y = var_838_cast_fp16)[name = string("key_7_cast_fp16")];
+            string var_848_pad_type_0 = const()[name = string("op_848_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_848_strides_0 = const()[name = string("op_848_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_848_pad_0 = const()[name = string("op_848_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_848_dilations_0 = const()[name = string("op_848_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_848_groups_0 = const()[name = string("op_848_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21893568))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22188544))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22188672)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_848_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_848_dilations_0, groups = var_848_groups_0, pad = var_848_pad_0, pad_type = var_848_pad_type_0, strides = var_848_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_848_cast_fp16")];
+            string var_854_pad_type_0 = const()[name = string("op_854_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_854_strides_0 = const()[name = string("op_854_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_854_pad_0 = const()[name = string("op_854_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_854_dilations_0 = const()[name = string("op_854_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_854_groups_0 = const()[name = string("op_854_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22198784))), nonzero_data = tensor<fp16, [4222]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22190272))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_854_cast_fp16 = conv(dilations = var_854_dilations_0, groups = var_854_groups_0, pad = var_854_pad_0, pad_type = var_854_pad_type_0, strides = var_854_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_854_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_7_cast_fp16 = add(x = var_848_cast_fp16, y = var_854_cast_fp16)[name = string("value_7_cast_fp16")];
+            tensor<int32, [4]> var_857 = const()[name = string("op_857"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_7_cast_fp16 = reshape(shape = var_857, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")];
+            fp16 var_859_to_fp16 = const()[name = string("op_859_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_860_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_859_to_fp16)[name = string("op_860_cast_fp16")];
+            tensor<int32, [4]> var_861 = const()[name = string("op_861"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_862_cast_fp16 = reshape(shape = var_861, x = key_7_cast_fp16)[name = string("op_862_cast_fp16")];
+            bool mh_w_7_transpose_x_0 = const()[name = string("mh_w_7_transpose_x_0"), val = bool(true)];
+            bool mh_w_7_transpose_y_0 = const()[name = string("mh_w_7_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_860_cast_fp16, y = var_862_cast_fp16)[name = string("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_865_cast_fp16 = softmax(axis = var_776, x = mh_w_7_cast_fp16)[name = string("op_865_cast_fp16")];
+            tensor<int32, [4]> var_866 = const()[name = string("op_866"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_867_cast_fp16 = reshape(shape = var_866, x = value_7_cast_fp16)[name = string("op_867_cast_fp16")];
+            bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)];
+            bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_867_cast_fp16, y = var_865_cast_fp16)[name = string("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_870 = const()[name = string("op_870"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_25_cast_fp16 = reshape(shape = var_870, x = attn_7_cast_fp16)[name = string("input_25_cast_fp16")];
+            string var_880_pad_type_0 = const()[name = string("op_880_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_880_strides_0 = const()[name = string("op_880_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_880_pad_0 = const()[name = string("op_880_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_880_dilations_0 = const()[name = string("op_880_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_880_groups_0 = const()[name = string("op_880_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22272576))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22567552))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22567680)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_880_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_880_dilations_0, groups = var_880_groups_0, pad = var_880_pad_0, pad_type = var_880_pad_type_0, strides = var_880_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_880_cast_fp16")];
+            string var_886_pad_type_0 = const()[name = string("op_886_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_886_strides_0 = const()[name = string("op_886_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_886_pad_0 = const()[name = string("op_886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_886_dilations_0 = const()[name = string("op_886_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_886_groups_0 = const()[name = string("op_886_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22576640))), nonzero_data = tensor<fp16, [3641]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22569280))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_886_cast_fp16 = conv(dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_886_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_15_cast_fp16 = add(x = var_880_cast_fp16, y = var_886_cast_fp16)[name = string("obj_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = string("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_897_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")];
+            tensor<fp16, [768]> input_27_gamma_0_to_fp16 = const()[name = string("input_27_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22650432)))];
+            tensor<fp16, [768]> input_27_beta_0_to_fp16 = const()[name = string("input_27_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22652032)))];
+            fp16 input_27_epsilon_0_to_fp16 = const()[name = string("input_27_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("input_27_cast_fp16")];
+            string var_915_pad_type_0 = const()[name = string("op_915_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_915_strides_0 = const()[name = string("op_915_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_915_pad_0 = const()[name = string("op_915_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_915_dilations_0 = const()[name = string("op_915_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_915_groups_0 = const()[name = string("op_915_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22653632))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23833344))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23833472)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_915_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_915_dilations_0, groups = var_915_groups_0, pad = var_915_pad_0, pad_type = var_915_pad_type_0, strides = var_915_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("op_915_cast_fp16")];
+            string var_921_pad_type_0 = const()[name = string("op_921_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_921_strides_0 = const()[name = string("op_921_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_921_pad_0 = const()[name = string("op_921_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_921_dilations_0 = const()[name = string("op_921_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_921_groups_0 = const()[name = string("op_921_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23883648))), nonzero_data = tensor<fp16, [21930]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23839680))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_921_cast_fp16 = conv(dilations = var_921_dilations_0, groups = var_921_groups_0, pad = var_921_pad_0, pad_type = var_921_pad_type_0, strides = var_921_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = string("op_921_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_29_cast_fp16 = add(x = var_915_cast_fp16, y = var_921_cast_fp16)[name = string("input_29_cast_fp16")];
+            string input_31_mode_0 = const()[name = string("input_31_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = string("input_31_cast_fp16")];
+            string var_932_pad_type_0 = const()[name = string("op_932_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_932_strides_0 = const()[name = string("op_932_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_932_pad_0 = const()[name = string("op_932_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_932_dilations_0 = const()[name = string("op_932_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_932_groups_0 = const()[name = string("op_932_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24178624))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25358336))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25358464)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_932_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_932_dilations_0, groups = var_932_groups_0, pad = var_932_pad_0, pad_type = var_932_pad_type_0, strides = var_932_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_932_cast_fp16")];
+            string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25407808))), nonzero_data = tensor<fp16, [23820]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25360064))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_938_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_11_cast_fp16 = add(x = var_932_cast_fp16, y = var_938_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_17_cast_fp16")];
+            int32 var_948 = const()[name = string("op_948"), val = int32(3)];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_967_to_fp16 = const()[name = string("op_967_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_967_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")];
+            tensor<fp16, [768]> obj_17_gamma_0_to_fp16 = const()[name = string("obj_17_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25702784)))];
+            tensor<fp16, [768]> obj_17_beta_0_to_fp16 = const()[name = string("obj_17_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25704384)))];
+            fp16 obj_17_epsilon_0_to_fp16 = const()[name = string("obj_17_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("obj_17_cast_fp16")];
+            string var_989_pad_type_0 = const()[name = string("op_989_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_989_strides_0 = const()[name = string("op_989_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_989_pad_0 = const()[name = string("op_989_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_989_dilations_0 = const()[name = string("op_989_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_989_groups_0 = const()[name = string("op_989_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25705984))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26000960))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26001088)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_989_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_989_dilations_0, groups = var_989_groups_0, pad = var_989_pad_0, pad_type = var_989_pad_type_0, strides = var_989_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_989_cast_fp16")];
+            string var_995_pad_type_0 = const()[name = string("op_995_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_995_strides_0 = const()[name = string("op_995_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_995_pad_0 = const()[name = string("op_995_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_995_dilations_0 = const()[name = string("op_995_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_995_groups_0 = const()[name = string("op_995_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26012480))), nonzero_data = tensor<fp16, [4848]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26002688))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_995_cast_fp16 = conv(dilations = var_995_dilations_0, groups = var_995_groups_0, pad = var_995_pad_0, pad_type = var_995_pad_type_0, strides = var_995_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_995_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_9_cast_fp16 = add(x = var_989_cast_fp16, y = var_995_cast_fp16)[name = string("query_9_cast_fp16")];
+            string var_1004_pad_type_0 = const()[name = string("op_1004_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1004_strides_0 = const()[name = string("op_1004_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1004_pad_0 = const()[name = string("op_1004_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1004_dilations_0 = const()[name = string("op_1004_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1004_groups_0 = const()[name = string("op_1004_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26086272))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26381248))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1004_cast_fp16 = conv(dilations = var_1004_dilations_0, groups = var_1004_groups_0, pad = var_1004_pad_0, pad_type = var_1004_pad_type_0, strides = var_1004_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1004_cast_fp16")];
+            string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26392192))), nonzero_data = tensor<fp16, [5362]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26381376))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1010_cast_fp16 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1010_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_9_cast_fp16 = add(x = var_1004_cast_fp16, y = var_1010_cast_fp16)[name = string("key_9_cast_fp16")];
+            string var_1020_pad_type_0 = const()[name = string("op_1020_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1020_strides_0 = const()[name = string("op_1020_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1020_pad_0 = const()[name = string("op_1020_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1020_dilations_0 = const()[name = string("op_1020_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1020_groups_0 = const()[name = string("op_1020_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26465984))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26760960))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26761088)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1020_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1020_dilations_0, groups = var_1020_groups_0, pad = var_1020_pad_0, pad_type = var_1020_pad_type_0, strides = var_1020_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1020_cast_fp16")];
+            string var_1026_pad_type_0 = const()[name = string("op_1026_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1026_strides_0 = const()[name = string("op_1026_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1026_pad_0 = const()[name = string("op_1026_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1026_dilations_0 = const()[name = string("op_1026_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1026_groups_0 = const()[name = string("op_1026_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26769984))), nonzero_data = tensor<fp16, [3606]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26762688))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1026_cast_fp16 = conv(dilations = var_1026_dilations_0, groups = var_1026_groups_0, pad = var_1026_pad_0, pad_type = var_1026_pad_type_0, strides = var_1026_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1026_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_9_cast_fp16 = add(x = var_1020_cast_fp16, y = var_1026_cast_fp16)[name = string("value_9_cast_fp16")];
+            tensor<int32, [4]> var_1029 = const()[name = string("op_1029"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_9_cast_fp16 = reshape(shape = var_1029, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")];
+            fp16 var_1031_to_fp16 = const()[name = string("op_1031_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1032_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_1031_to_fp16)[name = string("op_1032_cast_fp16")];
+            tensor<int32, [4]> var_1033 = const()[name = string("op_1033"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1034_cast_fp16 = reshape(shape = var_1033, x = key_9_cast_fp16)[name = string("op_1034_cast_fp16")];
+            bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)];
+            bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1032_cast_fp16, y = var_1034_cast_fp16)[name = string("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1037_cast_fp16 = softmax(axis = var_948, x = mh_w_9_cast_fp16)[name = string("op_1037_cast_fp16")];
+            tensor<int32, [4]> var_1038 = const()[name = string("op_1038"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1039_cast_fp16 = reshape(shape = var_1038, x = value_9_cast_fp16)[name = string("op_1039_cast_fp16")];
+            bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)];
+            bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_1039_cast_fp16, y = var_1037_cast_fp16)[name = string("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_1042 = const()[name = string("op_1042"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_33_cast_fp16 = reshape(shape = var_1042, x = attn_9_cast_fp16)[name = string("input_33_cast_fp16")];
+            string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26843776))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27138752))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27138880)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1052_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1052_cast_fp16")];
+            string var_1058_pad_type_0 = const()[name = string("op_1058_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1058_strides_0 = const()[name = string("op_1058_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1058_pad_0 = const()[name = string("op_1058_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1058_dilations_0 = const()[name = string("op_1058_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1058_groups_0 = const()[name = string("op_1058_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27146816))), nonzero_data = tensor<fp16, [3133]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27140480))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1058_cast_fp16 = conv(dilations = var_1058_dilations_0, groups = var_1058_groups_0, pad = var_1058_pad_0, pad_type = var_1058_pad_type_0, strides = var_1058_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1058_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_19_cast_fp16 = add(x = var_1052_cast_fp16, y = var_1058_cast_fp16)[name = string("obj_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = string("inputs_19_cast_fp16")];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1069_to_fp16 = const()[name = string("op_1069_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1069_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")];
+            tensor<fp16, [768]> input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27220608)))];
+            tensor<fp16, [768]> input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27222208)))];
+            fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("input_35_cast_fp16")];
+            string var_1087_pad_type_0 = const()[name = string("op_1087_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1087_strides_0 = const()[name = string("op_1087_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1087_pad_0 = const()[name = string("op_1087_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1087_dilations_0 = const()[name = string("op_1087_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1087_groups_0 = const()[name = string("op_1087_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27223808))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28403520))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28403648)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1087_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1087_dilations_0, groups = var_1087_groups_0, pad = var_1087_pad_0, pad_type = var_1087_pad_type_0, strides = var_1087_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1087_cast_fp16")];
+            string var_1093_pad_type_0 = const()[name = string("op_1093_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1093_strides_0 = const()[name = string("op_1093_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1093_pad_0 = const()[name = string("op_1093_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1093_dilations_0 = const()[name = string("op_1093_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1093_groups_0 = const()[name = string("op_1093_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28452288))), nonzero_data = tensor<fp16, [21163]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28409856))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1093_cast_fp16 = conv(dilations = var_1093_dilations_0, groups = var_1093_groups_0, pad = var_1093_pad_0, pad_type = var_1093_pad_type_0, strides = var_1093_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1093_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_37_cast_fp16 = add(x = var_1087_cast_fp16, y = var_1093_cast_fp16)[name = string("input_37_cast_fp16")];
+            string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")];
+            string var_1104_pad_type_0 = const()[name = string("op_1104_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1104_strides_0 = const()[name = string("op_1104_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1104_pad_0 = const()[name = string("op_1104_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1104_dilations_0 = const()[name = string("op_1104_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1104_groups_0 = const()[name = string("op_1104_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28747264))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29926976))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29927104)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1104_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1104_dilations_0, groups = var_1104_groups_0, pad = var_1104_pad_0, pad_type = var_1104_pad_type_0, strides = var_1104_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1104_cast_fp16")];
+            string var_1110_pad_type_0 = const()[name = string("op_1110_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1110_strides_0 = const()[name = string("op_1110_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1110_pad_0 = const()[name = string("op_1110_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1110_dilations_0 = const()[name = string("op_1110_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1110_groups_0 = const()[name = string("op_1110_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29977152))), nonzero_data = tensor<fp16, [24181]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29928704))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1110_cast_fp16 = conv(dilations = var_1110_dilations_0, groups = var_1110_groups_0, pad = var_1110_pad_0, pad_type = var_1110_pad_type_0, strides = var_1110_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1110_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_13_cast_fp16 = add(x = var_1104_cast_fp16, y = var_1110_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_21_cast_fp16")];
+            int32 var_1120 = const()[name = string("op_1120"), val = int32(3)];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1139_to_fp16 = const()[name = string("op_1139_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1139_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")];
+            tensor<fp16, [768]> obj_21_gamma_0_to_fp16 = const()[name = string("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30272128)))];
+            tensor<fp16, [768]> obj_21_beta_0_to_fp16 = const()[name = string("obj_21_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30273728)))];
+            fp16 obj_21_epsilon_0_to_fp16 = const()[name = string("obj_21_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_21_cast_fp16")];
+            string var_1161_pad_type_0 = const()[name = string("op_1161_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1161_strides_0 = const()[name = string("op_1161_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1161_pad_0 = const()[name = string("op_1161_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1161_dilations_0 = const()[name = string("op_1161_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1161_groups_0 = const()[name = string("op_1161_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30275328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30570304))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30570432)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1161_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1161_dilations_0, groups = var_1161_groups_0, pad = var_1161_pad_0, pad_type = var_1161_pad_type_0, strides = var_1161_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1161_cast_fp16")];
+            string var_1167_pad_type_0 = const()[name = string("op_1167_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1167_strides_0 = const()[name = string("op_1167_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1167_pad_0 = const()[name = string("op_1167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1167_dilations_0 = const()[name = string("op_1167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1167_groups_0 = const()[name = string("op_1167_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30579968))), nonzero_data = tensor<fp16, [3910]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30572032))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1167_cast_fp16 = conv(dilations = var_1167_dilations_0, groups = var_1167_groups_0, pad = var_1167_pad_0, pad_type = var_1167_pad_type_0, strides = var_1167_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1167_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_11_cast_fp16 = add(x = var_1161_cast_fp16, y = var_1167_cast_fp16)[name = string("query_11_cast_fp16")];
+            string var_1176_pad_type_0 = const()[name = string("op_1176_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1176_strides_0 = const()[name = string("op_1176_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1176_pad_0 = const()[name = string("op_1176_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1176_dilations_0 = const()[name = string("op_1176_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1176_groups_0 = const()[name = string("op_1176_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30653760))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30948736))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1176_cast_fp16 = conv(dilations = var_1176_dilations_0, groups = var_1176_groups_0, pad = var_1176_pad_0, pad_type = var_1176_pad_type_0, strides = var_1176_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1176_cast_fp16")];
+            string var_1182_pad_type_0 = const()[name = string("op_1182_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1182_strides_0 = const()[name = string("op_1182_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1182_pad_0 = const()[name = string("op_1182_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1182_dilations_0 = const()[name = string("op_1182_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1182_groups_0 = const()[name = string("op_1182_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30957056))), nonzero_data = tensor<fp16, [4049]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30948864))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1182_cast_fp16 = conv(dilations = var_1182_dilations_0, groups = var_1182_groups_0, pad = var_1182_pad_0, pad_type = var_1182_pad_type_0, strides = var_1182_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1182_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_11_cast_fp16 = add(x = var_1176_cast_fp16, y = var_1182_cast_fp16)[name = string("key_11_cast_fp16")];
+            string var_1192_pad_type_0 = const()[name = string("op_1192_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1192_strides_0 = const()[name = string("op_1192_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1192_pad_0 = const()[name = string("op_1192_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1192_dilations_0 = const()[name = string("op_1192_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1192_groups_0 = const()[name = string("op_1192_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31030848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31325824))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31325952)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1192_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1192_dilations_0, groups = var_1192_groups_0, pad = var_1192_pad_0, pad_type = var_1192_pad_type_0, strides = var_1192_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1192_cast_fp16")];
+            string var_1198_pad_type_0 = const()[name = string("op_1198_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1198_strides_0 = const()[name = string("op_1198_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1198_pad_0 = const()[name = string("op_1198_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1198_dilations_0 = const()[name = string("op_1198_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1198_groups_0 = const()[name = string("op_1198_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31334976))), nonzero_data = tensor<fp16, [3661]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31327552))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1198_cast_fp16 = conv(dilations = var_1198_dilations_0, groups = var_1198_groups_0, pad = var_1198_pad_0, pad_type = var_1198_pad_type_0, strides = var_1198_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1198_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_11_cast_fp16 = add(x = var_1192_cast_fp16, y = var_1198_cast_fp16)[name = string("value_11_cast_fp16")];
+            tensor<int32, [4]> var_1201 = const()[name = string("op_1201"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_11_cast_fp16 = reshape(shape = var_1201, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")];
+            fp16 var_1203_to_fp16 = const()[name = string("op_1203_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1204_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_1203_to_fp16)[name = string("op_1204_cast_fp16")];
+            tensor<int32, [4]> var_1205 = const()[name = string("op_1205"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1206_cast_fp16 = reshape(shape = var_1205, x = key_11_cast_fp16)[name = string("op_1206_cast_fp16")];
+            bool mh_w_11_transpose_x_0 = const()[name = string("mh_w_11_transpose_x_0"), val = bool(true)];
+            bool mh_w_11_transpose_y_0 = const()[name = string("mh_w_11_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_1204_cast_fp16, y = var_1206_cast_fp16)[name = string("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1209_cast_fp16 = softmax(axis = var_1120, x = mh_w_11_cast_fp16)[name = string("op_1209_cast_fp16")];
+            tensor<int32, [4]> var_1210 = const()[name = string("op_1210"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1211_cast_fp16 = reshape(shape = var_1210, x = value_11_cast_fp16)[name = string("op_1211_cast_fp16")];
+            bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)];
+            bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_1211_cast_fp16, y = var_1209_cast_fp16)[name = string("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_1214 = const()[name = string("op_1214"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_41_cast_fp16 = reshape(shape = var_1214, x = attn_11_cast_fp16)[name = string("input_41_cast_fp16")];
+            string var_1224_pad_type_0 = const()[name = string("op_1224_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1224_strides_0 = const()[name = string("op_1224_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1224_pad_0 = const()[name = string("op_1224_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1224_dilations_0 = const()[name = string("op_1224_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1224_groups_0 = const()[name = string("op_1224_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31408768))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31703744))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31703872)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1224_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1224_dilations_0, groups = var_1224_groups_0, pad = var_1224_pad_0, pad_type = var_1224_pad_type_0, strides = var_1224_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1224_cast_fp16")];
+            string var_1230_pad_type_0 = const()[name = string("op_1230_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1230_strides_0 = const()[name = string("op_1230_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1230_pad_0 = const()[name = string("op_1230_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1230_dilations_0 = const()[name = string("op_1230_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1230_groups_0 = const()[name = string("op_1230_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31713792))), nonzero_data = tensor<fp16, [4128]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31705472))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1230_cast_fp16 = conv(dilations = var_1230_dilations_0, groups = var_1230_groups_0, pad = var_1230_pad_0, pad_type = var_1230_pad_type_0, strides = var_1230_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1230_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_23_cast_fp16 = add(x = var_1224_cast_fp16, y = var_1230_cast_fp16)[name = string("obj_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = string("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1241_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")];
+            tensor<fp16, [768]> input_43_gamma_0_to_fp16 = const()[name = string("input_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31787584)))];
+            tensor<fp16, [768]> input_43_beta_0_to_fp16 = const()[name = string("input_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31789184)))];
+            fp16 input_43_epsilon_0_to_fp16 = const()[name = string("input_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_43_cast_fp16")];
+            string var_1259_pad_type_0 = const()[name = string("op_1259_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1259_strides_0 = const()[name = string("op_1259_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1259_pad_0 = const()[name = string("op_1259_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1259_dilations_0 = const()[name = string("op_1259_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1259_groups_0 = const()[name = string("op_1259_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31790784))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32970496))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32970624)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1259_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1259_dilations_0, groups = var_1259_groups_0, pad = var_1259_pad_0, pad_type = var_1259_pad_type_0, strides = var_1259_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1259_cast_fp16")];
+            string var_1265_pad_type_0 = const()[name = string("op_1265_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1265_strides_0 = const()[name = string("op_1265_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1265_pad_0 = const()[name = string("op_1265_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1265_dilations_0 = const()[name = string("op_1265_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1265_groups_0 = const()[name = string("op_1265_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33018432))), nonzero_data = tensor<fp16, [20754]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32976832))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1265_cast_fp16 = conv(dilations = var_1265_dilations_0, groups = var_1265_groups_0, pad = var_1265_pad_0, pad_type = var_1265_pad_type_0, strides = var_1265_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1265_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_45_cast_fp16 = add(x = var_1259_cast_fp16, y = var_1265_cast_fp16)[name = string("input_45_cast_fp16")];
+            string input_47_mode_0 = const()[name = string("input_47_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")];
+            string var_1276_pad_type_0 = const()[name = string("op_1276_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1276_strides_0 = const()[name = string("op_1276_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1276_pad_0 = const()[name = string("op_1276_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1276_dilations_0 = const()[name = string("op_1276_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1276_groups_0 = const()[name = string("op_1276_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33313408))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34493120))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34493248)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1276_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1276_dilations_0, groups = var_1276_groups_0, pad = var_1276_pad_0, pad_type = var_1276_pad_type_0, strides = var_1276_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = string("op_1276_cast_fp16")];
+            string var_1282_pad_type_0 = const()[name = string("op_1282_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1282_strides_0 = const()[name = string("op_1282_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1282_pad_0 = const()[name = string("op_1282_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1282_dilations_0 = const()[name = string("op_1282_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1282_groups_0 = const()[name = string("op_1282_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34539520))), nonzero_data = tensor<fp16, [22280]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34494848))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1282_cast_fp16 = conv(dilations = var_1282_dilations_0, groups = var_1282_groups_0, pad = var_1282_pad_0, pad_type = var_1282_pad_type_0, strides = var_1282_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = string("op_1282_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_15_cast_fp16 = add(x = var_1276_cast_fp16, y = var_1282_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_25_cast_fp16")];
+            int32 var_1292 = const()[name = string("op_1292"), val = int32(3)];
+            tensor<int32, [1]> out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1311_to_fp16 = const()[name = string("op_1311_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1311_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")];
+            tensor<fp16, [768]> obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34834496)))];
+            tensor<fp16, [768]> obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34836096)))];
+            fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_25_cast_fp16")];
+            string var_1333_pad_type_0 = const()[name = string("op_1333_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1333_strides_0 = const()[name = string("op_1333_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1333_pad_0 = const()[name = string("op_1333_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1333_dilations_0 = const()[name = string("op_1333_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1333_groups_0 = const()[name = string("op_1333_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34837696))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35132672))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35132800)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1333_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1333_dilations_0, groups = var_1333_groups_0, pad = var_1333_pad_0, pad_type = var_1333_pad_type_0, strides = var_1333_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1333_cast_fp16")];
+            string var_1339_pad_type_0 = const()[name = string("op_1339_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1339_strides_0 = const()[name = string("op_1339_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1339_pad_0 = const()[name = string("op_1339_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1339_dilations_0 = const()[name = string("op_1339_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1339_groups_0 = const()[name = string("op_1339_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35142208))), nonzero_data = tensor<fp16, [3870]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35134400))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1339_cast_fp16 = conv(dilations = var_1339_dilations_0, groups = var_1339_groups_0, pad = var_1339_pad_0, pad_type = var_1339_pad_type_0, strides = var_1339_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1339_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_13_cast_fp16 = add(x = var_1333_cast_fp16, y = var_1339_cast_fp16)[name = string("query_13_cast_fp16")];
+            string var_1348_pad_type_0 = const()[name = string("op_1348_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1348_strides_0 = const()[name = string("op_1348_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1348_pad_0 = const()[name = string("op_1348_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1348_dilations_0 = const()[name = string("op_1348_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1348_groups_0 = const()[name = string("op_1348_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35216000))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35510976))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1348_cast_fp16 = conv(dilations = var_1348_dilations_0, groups = var_1348_groups_0, pad = var_1348_pad_0, pad_type = var_1348_pad_type_0, strides = var_1348_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1348_cast_fp16")];
+            string var_1354_pad_type_0 = const()[name = string("op_1354_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1354_strides_0 = const()[name = string("op_1354_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1354_pad_0 = const()[name = string("op_1354_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1354_dilations_0 = const()[name = string("op_1354_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1354_groups_0 = const()[name = string("op_1354_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35518848))), nonzero_data = tensor<fp16, [3809]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35511104))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1354_cast_fp16 = conv(dilations = var_1354_dilations_0, groups = var_1354_groups_0, pad = var_1354_pad_0, pad_type = var_1354_pad_type_0, strides = var_1354_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1354_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_13_cast_fp16 = add(x = var_1348_cast_fp16, y = var_1354_cast_fp16)[name = string("key_13_cast_fp16")];
+            string var_1364_pad_type_0 = const()[name = string("op_1364_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1364_strides_0 = const()[name = string("op_1364_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1364_pad_0 = const()[name = string("op_1364_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1364_dilations_0 = const()[name = string("op_1364_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1364_groups_0 = const()[name = string("op_1364_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35592640))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35887616))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35887744)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1364_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1364_dilations_0, groups = var_1364_groups_0, pad = var_1364_pad_0, pad_type = var_1364_pad_type_0, strides = var_1364_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1364_cast_fp16")];
+            string var_1370_pad_type_0 = const()[name = string("op_1370_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1370_strides_0 = const()[name = string("op_1370_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1370_pad_0 = const()[name = string("op_1370_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1370_dilations_0 = const()[name = string("op_1370_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1370_groups_0 = const()[name = string("op_1370_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35895296))), nonzero_data = tensor<fp16, [2927]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35889344))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1370_cast_fp16 = conv(dilations = var_1370_dilations_0, groups = var_1370_groups_0, pad = var_1370_pad_0, pad_type = var_1370_pad_type_0, strides = var_1370_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1370_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_13_cast_fp16 = add(x = var_1364_cast_fp16, y = var_1370_cast_fp16)[name = string("value_13_cast_fp16")];
+            tensor<int32, [4]> var_1373 = const()[name = string("op_1373"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_13_cast_fp16 = reshape(shape = var_1373, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")];
+            fp16 var_1375_to_fp16 = const()[name = string("op_1375_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1376_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1375_to_fp16)[name = string("op_1376_cast_fp16")];
+            tensor<int32, [4]> var_1377 = const()[name = string("op_1377"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1378_cast_fp16 = reshape(shape = var_1377, x = key_13_cast_fp16)[name = string("op_1378_cast_fp16")];
+            bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)];
+            bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1376_cast_fp16, y = var_1378_cast_fp16)[name = string("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1381_cast_fp16 = softmax(axis = var_1292, x = mh_w_13_cast_fp16)[name = string("op_1381_cast_fp16")];
+            tensor<int32, [4]> var_1382 = const()[name = string("op_1382"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1383_cast_fp16 = reshape(shape = var_1382, x = value_13_cast_fp16)[name = string("op_1383_cast_fp16")];
+            bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)];
+            bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1383_cast_fp16, y = var_1381_cast_fp16)[name = string("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_1386 = const()[name = string("op_1386"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_49_cast_fp16 = reshape(shape = var_1386, x = attn_13_cast_fp16)[name = string("input_49_cast_fp16")];
+            string var_1396_pad_type_0 = const()[name = string("op_1396_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1396_strides_0 = const()[name = string("op_1396_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1396_pad_0 = const()[name = string("op_1396_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1396_dilations_0 = const()[name = string("op_1396_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1396_groups_0 = const()[name = string("op_1396_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35969088))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36264064))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36264192)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1396_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1396_dilations_0, groups = var_1396_groups_0, pad = var_1396_pad_0, pad_type = var_1396_pad_type_0, strides = var_1396_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1396_cast_fp16")];
+            string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36270720))), nonzero_data = tensor<fp16, [2412]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36265792))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1402_cast_fp16 = conv(dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1402_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_27_cast_fp16 = add(x = var_1396_cast_fp16, y = var_1402_cast_fp16)[name = string("obj_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = string("inputs_27_cast_fp16")];
+            tensor<int32, [1]> out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1413_to_fp16 = const()[name = string("op_1413_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1413_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")];
+            tensor<fp16, [768]> input_51_gamma_0_to_fp16 = const()[name = string("input_51_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36344512)))];
+            tensor<fp16, [768]> input_51_beta_0_to_fp16 = const()[name = string("input_51_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36346112)))];
+            fp16 input_51_epsilon_0_to_fp16 = const()[name = string("input_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("input_51_cast_fp16")];
+            string var_1431_pad_type_0 = const()[name = string("op_1431_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1431_strides_0 = const()[name = string("op_1431_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1431_pad_0 = const()[name = string("op_1431_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1431_dilations_0 = const()[name = string("op_1431_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1431_groups_0 = const()[name = string("op_1431_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36347712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37527424))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37527552)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1431_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_1431_dilations_0, groups = var_1431_groups_0, pad = var_1431_pad_0, pad_type = var_1431_pad_type_0, strides = var_1431_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1431_cast_fp16")];
+            string var_1437_pad_type_0 = const()[name = string("op_1437_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1437_strides_0 = const()[name = string("op_1437_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1437_pad_0 = const()[name = string("op_1437_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1437_dilations_0 = const()[name = string("op_1437_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1437_groups_0 = const()[name = string("op_1437_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37566848))), nonzero_data = tensor<fp16, [16482]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37533760))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1437_cast_fp16 = conv(dilations = var_1437_dilations_0, groups = var_1437_groups_0, pad = var_1437_pad_0, pad_type = var_1437_pad_type_0, strides = var_1437_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1437_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_53_cast_fp16 = add(x = var_1431_cast_fp16, y = var_1437_cast_fp16)[name = string("input_53_cast_fp16")];
+            string input_55_mode_0 = const()[name = string("input_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = string("input_55_cast_fp16")];
+            string var_1448_pad_type_0 = const()[name = string("op_1448_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1448_strides_0 = const()[name = string("op_1448_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1448_pad_0 = const()[name = string("op_1448_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1448_dilations_0 = const()[name = string("op_1448_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1448_groups_0 = const()[name = string("op_1448_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37861824))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39041536))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39041664)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1448_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_1448_dilations_0, groups = var_1448_groups_0, pad = var_1448_pad_0, pad_type = var_1448_pad_type_0, strides = var_1448_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1448_cast_fp16")];
+            string var_1454_pad_type_0 = const()[name = string("op_1454_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1454_strides_0 = const()[name = string("op_1454_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1454_pad_0 = const()[name = string("op_1454_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1454_dilations_0 = const()[name = string("op_1454_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1454_groups_0 = const()[name = string("op_1454_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39079168))), nonzero_data = tensor<fp16, [17897]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39043264))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1454_cast_fp16 = conv(dilations = var_1454_dilations_0, groups = var_1454_groups_0, pad = var_1454_pad_0, pad_type = var_1454_pad_type_0, strides = var_1454_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1454_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_17_cast_fp16 = add(x = var_1448_cast_fp16, y = var_1454_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_29_cast_fp16")];
+            int32 var_1464 = const()[name = string("op_1464"), val = int32(3)];
+            tensor<int32, [1]> out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1483_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")];
+            tensor<fp16, [768]> obj_29_gamma_0_to_fp16 = const()[name = string("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39374144)))];
+            tensor<fp16, [768]> obj_29_beta_0_to_fp16 = const()[name = string("obj_29_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39375744)))];
+            fp16 obj_29_epsilon_0_to_fp16 = const()[name = string("obj_29_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("obj_29_cast_fp16")];
+            string var_1505_pad_type_0 = const()[name = string("op_1505_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1505_strides_0 = const()[name = string("op_1505_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1505_pad_0 = const()[name = string("op_1505_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1505_dilations_0 = const()[name = string("op_1505_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1505_groups_0 = const()[name = string("op_1505_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39377344))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39672320))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39672448)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1505_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1505_dilations_0, groups = var_1505_groups_0, pad = var_1505_pad_0, pad_type = var_1505_pad_type_0, strides = var_1505_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1505_cast_fp16")];
+            string var_1511_pad_type_0 = const()[name = string("op_1511_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1511_strides_0 = const()[name = string("op_1511_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1511_pad_0 = const()[name = string("op_1511_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1511_dilations_0 = const()[name = string("op_1511_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1511_groups_0 = const()[name = string("op_1511_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39680704))), nonzero_data = tensor<fp16, [3266]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39674048))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1511_cast_fp16 = conv(dilations = var_1511_dilations_0, groups = var_1511_groups_0, pad = var_1511_pad_0, pad_type = var_1511_pad_type_0, strides = var_1511_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1511_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_15_cast_fp16 = add(x = var_1505_cast_fp16, y = var_1511_cast_fp16)[name = string("query_15_cast_fp16")];
+            string var_1520_pad_type_0 = const()[name = string("op_1520_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1520_strides_0 = const()[name = string("op_1520_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1520_pad_0 = const()[name = string("op_1520_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1520_dilations_0 = const()[name = string("op_1520_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1520_groups_0 = const()[name = string("op_1520_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39754496))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40049472))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1520_cast_fp16 = conv(dilations = var_1520_dilations_0, groups = var_1520_groups_0, pad = var_1520_pad_0, pad_type = var_1520_pad_type_0, strides = var_1520_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1520_cast_fp16")];
+            string var_1526_pad_type_0 = const()[name = string("op_1526_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1526_strides_0 = const()[name = string("op_1526_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1526_pad_0 = const()[name = string("op_1526_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1526_dilations_0 = const()[name = string("op_1526_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1526_groups_0 = const()[name = string("op_1526_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40056832))), nonzero_data = tensor<fp16, [3558]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40049600))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1526_cast_fp16 = conv(dilations = var_1526_dilations_0, groups = var_1526_groups_0, pad = var_1526_pad_0, pad_type = var_1526_pad_type_0, strides = var_1526_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1526_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_15_cast_fp16 = add(x = var_1520_cast_fp16, y = var_1526_cast_fp16)[name = string("key_15_cast_fp16")];
+            string var_1536_pad_type_0 = const()[name = string("op_1536_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1536_strides_0 = const()[name = string("op_1536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1536_pad_0 = const()[name = string("op_1536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1536_dilations_0 = const()[name = string("op_1536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1536_groups_0 = const()[name = string("op_1536_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40130624))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40425600))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40425728)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1536_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1536_dilations_0, groups = var_1536_groups_0, pad = var_1536_pad_0, pad_type = var_1536_pad_type_0, strides = var_1536_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1536_cast_fp16")];
+            string var_1542_pad_type_0 = const()[name = string("op_1542_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1542_strides_0 = const()[name = string("op_1542_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1542_pad_0 = const()[name = string("op_1542_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1542_dilations_0 = const()[name = string("op_1542_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1542_groups_0 = const()[name = string("op_1542_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40433728))), nonzero_data = tensor<fp16, [3154]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40427328))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1542_cast_fp16 = conv(dilations = var_1542_dilations_0, groups = var_1542_groups_0, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1542_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1542_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_15_cast_fp16 = add(x = var_1536_cast_fp16, y = var_1542_cast_fp16)[name = string("value_15_cast_fp16")];
+            tensor<int32, [4]> var_1545 = const()[name = string("op_1545"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_15_cast_fp16 = reshape(shape = var_1545, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")];
+            fp16 var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1548_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1547_to_fp16)[name = string("op_1548_cast_fp16")];
+            tensor<int32, [4]> var_1549 = const()[name = string("op_1549"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1550_cast_fp16 = reshape(shape = var_1549, x = key_15_cast_fp16)[name = string("op_1550_cast_fp16")];
+            bool mh_w_15_transpose_x_0 = const()[name = string("mh_w_15_transpose_x_0"), val = bool(true)];
+            bool mh_w_15_transpose_y_0 = const()[name = string("mh_w_15_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1548_cast_fp16, y = var_1550_cast_fp16)[name = string("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1553_cast_fp16 = softmax(axis = var_1464, x = mh_w_15_cast_fp16)[name = string("op_1553_cast_fp16")];
+            tensor<int32, [4]> var_1554 = const()[name = string("op_1554"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1555_cast_fp16 = reshape(shape = var_1554, x = value_15_cast_fp16)[name = string("op_1555_cast_fp16")];
+            bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)];
+            bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1555_cast_fp16, y = var_1553_cast_fp16)[name = string("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_1558 = const()[name = string("op_1558"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_57_cast_fp16 = reshape(shape = var_1558, x = attn_15_cast_fp16)[name = string("input_57_cast_fp16")];
+            string var_1568_pad_type_0 = const()[name = string("op_1568_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1568_strides_0 = const()[name = string("op_1568_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1568_pad_0 = const()[name = string("op_1568_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1568_dilations_0 = const()[name = string("op_1568_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1568_groups_0 = const()[name = string("op_1568_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40507520))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40802496))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40802624)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1568_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1568_dilations_0, groups = var_1568_groups_0, pad = var_1568_pad_0, pad_type = var_1568_pad_type_0, strides = var_1568_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = string("op_1568_cast_fp16")];
+            string var_1574_pad_type_0 = const()[name = string("op_1574_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1574_strides_0 = const()[name = string("op_1574_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1574_pad_0 = const()[name = string("op_1574_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1574_dilations_0 = const()[name = string("op_1574_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1574_groups_0 = const()[name = string("op_1574_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40810112))), nonzero_data = tensor<fp16, [2902]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40804224))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1574_cast_fp16 = conv(dilations = var_1574_dilations_0, groups = var_1574_groups_0, pad = var_1574_pad_0, pad_type = var_1574_pad_type_0, strides = var_1574_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = string("op_1574_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_31_cast_fp16 = add(x = var_1568_cast_fp16, y = var_1574_cast_fp16)[name = string("obj_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_31_cast_fp16")];
+            tensor<int32, [1]> out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1585_to_fp16 = const()[name = string("op_1585_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1585_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")];
+            tensor<fp16, [768]> input_59_gamma_0_to_fp16 = const()[name = string("input_59_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40883904)))];
+            tensor<fp16, [768]> input_59_beta_0_to_fp16 = const()[name = string("input_59_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40885504)))];
+            fp16 input_59_epsilon_0_to_fp16 = const()[name = string("input_59_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("input_59_cast_fp16")];
+            string var_1603_pad_type_0 = const()[name = string("op_1603_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1603_strides_0 = const()[name = string("op_1603_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1603_pad_0 = const()[name = string("op_1603_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1603_dilations_0 = const()[name = string("op_1603_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1603_groups_0 = const()[name = string("op_1603_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40887104))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42066816))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42066944)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1603_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_1603_dilations_0, groups = var_1603_groups_0, pad = var_1603_pad_0, pad_type = var_1603_pad_type_0, strides = var_1603_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1603_cast_fp16")];
+            string var_1609_pad_type_0 = const()[name = string("op_1609_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1609_strides_0 = const()[name = string("op_1609_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1609_pad_0 = const()[name = string("op_1609_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1609_dilations_0 = const()[name = string("op_1609_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1609_groups_0 = const()[name = string("op_1609_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42110976))), nonzero_data = tensor<fp16, [18849]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42073152))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1609_cast_fp16 = conv(dilations = var_1609_dilations_0, groups = var_1609_groups_0, pad = var_1609_pad_0, pad_type = var_1609_pad_type_0, strides = var_1609_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1609_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_61_cast_fp16 = add(x = var_1603_cast_fp16, y = var_1609_cast_fp16)[name = string("input_61_cast_fp16")];
+            string input_63_mode_0 = const()[name = string("input_63_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = string("input_63_cast_fp16")];
+            string var_1620_pad_type_0 = const()[name = string("op_1620_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1620_strides_0 = const()[name = string("op_1620_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1620_pad_0 = const()[name = string("op_1620_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1620_dilations_0 = const()[name = string("op_1620_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1620_groups_0 = const()[name = string("op_1620_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42405952))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43585664))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43585792)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1620_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_1620_dilations_0, groups = var_1620_groups_0, pad = var_1620_pad_0, pad_type = var_1620_pad_type_0, strides = var_1620_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_1620_cast_fp16")];
+            string var_1626_pad_type_0 = const()[name = string("op_1626_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1626_strides_0 = const()[name = string("op_1626_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1626_pad_0 = const()[name = string("op_1626_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1626_dilations_0 = const()[name = string("op_1626_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1626_groups_0 = const()[name = string("op_1626_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43616960))), nonzero_data = tensor<fp16, [14729]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43587392))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1626_cast_fp16 = conv(dilations = var_1626_dilations_0, groups = var_1626_groups_0, pad = var_1626_pad_0, pad_type = var_1626_pad_type_0, strides = var_1626_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_1626_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_19_cast_fp16 = add(x = var_1620_cast_fp16, y = var_1626_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_33_cast_fp16")];
+            int32 var_1636 = const()[name = string("op_1636"), val = int32(3)];
+            tensor<int32, [1]> out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1655_to_fp16 = const()[name = string("op_1655_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1655_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")];
+            tensor<fp16, [768]> obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43911936)))];
+            tensor<fp16, [768]> obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43913536)))];
+            fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_33_cast_fp16")];
+            string var_1677_pad_type_0 = const()[name = string("op_1677_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1677_strides_0 = const()[name = string("op_1677_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1677_pad_0 = const()[name = string("op_1677_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1677_dilations_0 = const()[name = string("op_1677_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1677_groups_0 = const()[name = string("op_1677_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43915136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44210112))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44210240)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1677_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1677_dilations_0, groups = var_1677_groups_0, pad = var_1677_pad_0, pad_type = var_1677_pad_type_0, strides = var_1677_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1677_cast_fp16")];
+            string var_1683_pad_type_0 = const()[name = string("op_1683_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1683_strides_0 = const()[name = string("op_1683_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1683_pad_0 = const()[name = string("op_1683_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1683_dilations_0 = const()[name = string("op_1683_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1683_groups_0 = const()[name = string("op_1683_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44219008))), nonzero_data = tensor<fp16, [3550]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44211840))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1683_cast_fp16 = conv(dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1683_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_17_cast_fp16 = add(x = var_1677_cast_fp16, y = var_1683_cast_fp16)[name = string("query_17_cast_fp16")];
+            string var_1692_pad_type_0 = const()[name = string("op_1692_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1692_strides_0 = const()[name = string("op_1692_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1692_pad_0 = const()[name = string("op_1692_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1692_dilations_0 = const()[name = string("op_1692_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1692_groups_0 = const()[name = string("op_1692_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44292800))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44587776))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1692_cast_fp16 = conv(dilations = var_1692_dilations_0, groups = var_1692_groups_0, pad = var_1692_pad_0, pad_type = var_1692_pad_type_0, strides = var_1692_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1692_cast_fp16")];
+            string var_1698_pad_type_0 = const()[name = string("op_1698_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1698_strides_0 = const()[name = string("op_1698_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1698_pad_0 = const()[name = string("op_1698_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1698_dilations_0 = const()[name = string("op_1698_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1698_groups_0 = const()[name = string("op_1698_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44595136))), nonzero_data = tensor<fp16, [3567]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44587904))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1698_cast_fp16 = conv(dilations = var_1698_dilations_0, groups = var_1698_groups_0, pad = var_1698_pad_0, pad_type = var_1698_pad_type_0, strides = var_1698_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1698_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_17_cast_fp16 = add(x = var_1692_cast_fp16, y = var_1698_cast_fp16)[name = string("key_17_cast_fp16")];
+            string var_1708_pad_type_0 = const()[name = string("op_1708_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1708_strides_0 = const()[name = string("op_1708_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1708_pad_0 = const()[name = string("op_1708_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1708_dilations_0 = const()[name = string("op_1708_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1708_groups_0 = const()[name = string("op_1708_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44668928))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44963904))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44964032)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1708_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1708_dilations_0, groups = var_1708_groups_0, pad = var_1708_pad_0, pad_type = var_1708_pad_type_0, strides = var_1708_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1708_cast_fp16")];
+            string var_1714_pad_type_0 = const()[name = string("op_1714_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1714_strides_0 = const()[name = string("op_1714_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1714_pad_0 = const()[name = string("op_1714_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1714_dilations_0 = const()[name = string("op_1714_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1714_groups_0 = const()[name = string("op_1714_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44971200))), nonzero_data = tensor<fp16, [2744]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44965632))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1714_cast_fp16 = conv(dilations = var_1714_dilations_0, groups = var_1714_groups_0, pad = var_1714_pad_0, pad_type = var_1714_pad_type_0, strides = var_1714_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1714_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_17_cast_fp16 = add(x = var_1708_cast_fp16, y = var_1714_cast_fp16)[name = string("value_17_cast_fp16")];
+            tensor<int32, [4]> var_1717 = const()[name = string("op_1717"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_17_cast_fp16 = reshape(shape = var_1717, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")];
+            fp16 var_1719_to_fp16 = const()[name = string("op_1719_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1720_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1719_to_fp16)[name = string("op_1720_cast_fp16")];
+            tensor<int32, [4]> var_1721 = const()[name = string("op_1721"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1722_cast_fp16 = reshape(shape = var_1721, x = key_17_cast_fp16)[name = string("op_1722_cast_fp16")];
+            bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)];
+            bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1720_cast_fp16, y = var_1722_cast_fp16)[name = string("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1725_cast_fp16 = softmax(axis = var_1636, x = mh_w_17_cast_fp16)[name = string("op_1725_cast_fp16")];
+            tensor<int32, [4]> var_1726 = const()[name = string("op_1726"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1727_cast_fp16 = reshape(shape = var_1726, x = value_17_cast_fp16)[name = string("op_1727_cast_fp16")];
+            bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)];
+            bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1727_cast_fp16, y = var_1725_cast_fp16)[name = string("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1730 = const()[name = string("op_1730"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_65_cast_fp16 = reshape(shape = var_1730, x = attn_17_cast_fp16)[name = string("input_65_cast_fp16")];
+            string var_1740_pad_type_0 = const()[name = string("op_1740_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1740_strides_0 = const()[name = string("op_1740_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1740_pad_0 = const()[name = string("op_1740_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1740_dilations_0 = const()[name = string("op_1740_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1740_groups_0 = const()[name = string("op_1740_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45044992))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45339968))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45340096)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1740_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1740_dilations_0, groups = var_1740_groups_0, pad = var_1740_pad_0, pad_type = var_1740_pad_type_0, strides = var_1740_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_1740_cast_fp16")];
+            string var_1746_pad_type_0 = const()[name = string("op_1746_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1746_strides_0 = const()[name = string("op_1746_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1746_pad_0 = const()[name = string("op_1746_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1746_dilations_0 = const()[name = string("op_1746_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1746_groups_0 = const()[name = string("op_1746_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45347776))), nonzero_data = tensor<fp16, [2992]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45341696))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1746_cast_fp16 = conv(dilations = var_1746_dilations_0, groups = var_1746_groups_0, pad = var_1746_pad_0, pad_type = var_1746_pad_type_0, strides = var_1746_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_1746_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_35_cast_fp16 = add(x = var_1740_cast_fp16, y = var_1746_cast_fp16)[name = string("obj_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = string("inputs_35_cast_fp16")];
+            tensor<int32, [1]> out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1757_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")];
+            tensor<fp16, [768]> input_67_gamma_0_to_fp16 = const()[name = string("input_67_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45421568)))];
+            tensor<fp16, [768]> input_67_beta_0_to_fp16 = const()[name = string("input_67_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45423168)))];
+            fp16 input_67_epsilon_0_to_fp16 = const()[name = string("input_67_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_67_cast_fp16")];
+            string var_1775_pad_type_0 = const()[name = string("op_1775_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1775_strides_0 = const()[name = string("op_1775_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1775_pad_0 = const()[name = string("op_1775_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1775_dilations_0 = const()[name = string("op_1775_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1775_groups_0 = const()[name = string("op_1775_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45424768))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46604480))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46604608)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1775_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_1775_dilations_0, groups = var_1775_groups_0, pad = var_1775_pad_0, pad_type = var_1775_pad_type_0, strides = var_1775_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = string("op_1775_cast_fp16")];
+            string var_1781_pad_type_0 = const()[name = string("op_1781_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1781_strides_0 = const()[name = string("op_1781_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1781_pad_0 = const()[name = string("op_1781_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1781_dilations_0 = const()[name = string("op_1781_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1781_groups_0 = const()[name = string("op_1781_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46641984))), nonzero_data = tensor<fp16, [15537]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46610816))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1781_cast_fp16 = conv(dilations = var_1781_dilations_0, groups = var_1781_groups_0, pad = var_1781_pad_0, pad_type = var_1781_pad_type_0, strides = var_1781_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = string("op_1781_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_69_cast_fp16 = add(x = var_1775_cast_fp16, y = var_1781_cast_fp16)[name = string("input_69_cast_fp16")];
+            string input_71_mode_0 = const()[name = string("input_71_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = string("input_71_cast_fp16")];
+            string var_1792_pad_type_0 = const()[name = string("op_1792_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1792_strides_0 = const()[name = string("op_1792_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1792_pad_0 = const()[name = string("op_1792_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1792_dilations_0 = const()[name = string("op_1792_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1792_groups_0 = const()[name = string("op_1792_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46936960))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48116672))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48116800)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1792_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_1792_dilations_0, groups = var_1792_groups_0, pad = var_1792_pad_0, pad_type = var_1792_pad_type_0, strides = var_1792_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_1792_cast_fp16")];
+            string var_1798_pad_type_0 = const()[name = string("op_1798_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1798_strides_0 = const()[name = string("op_1798_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1798_pad_0 = const()[name = string("op_1798_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1798_dilations_0 = const()[name = string("op_1798_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1798_groups_0 = const()[name = string("op_1798_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48148992))), nonzero_data = tensor<fp16, [15247]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48118400))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1798_cast_fp16 = conv(dilations = var_1798_dilations_0, groups = var_1798_groups_0, pad = var_1798_pad_0, pad_type = var_1798_pad_type_0, strides = var_1798_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_1798_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_21_cast_fp16 = add(x = var_1792_cast_fp16, y = var_1798_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_37_cast_fp16")];
+            int32 var_1808 = const()[name = string("op_1808"), val = int32(3)];
+            tensor<int32, [1]> out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1827_to_fp16 = const()[name = string("op_1827_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1827_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")];
+            tensor<fp16, [768]> obj_37_gamma_0_to_fp16 = const()[name = string("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48443968)))];
+            tensor<fp16, [768]> obj_37_beta_0_to_fp16 = const()[name = string("obj_37_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48445568)))];
+            fp16 obj_37_epsilon_0_to_fp16 = const()[name = string("obj_37_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_37_cast_fp16")];
+            string var_1849_pad_type_0 = const()[name = string("op_1849_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1849_strides_0 = const()[name = string("op_1849_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1849_pad_0 = const()[name = string("op_1849_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1849_dilations_0 = const()[name = string("op_1849_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1849_groups_0 = const()[name = string("op_1849_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48447168))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48742144))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48742272)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1849_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1849_dilations_0, groups = var_1849_groups_0, pad = var_1849_pad_0, pad_type = var_1849_pad_type_0, strides = var_1849_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1849_cast_fp16")];
+            string var_1855_pad_type_0 = const()[name = string("op_1855_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1855_strides_0 = const()[name = string("op_1855_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1855_pad_0 = const()[name = string("op_1855_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1855_dilations_0 = const()[name = string("op_1855_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1855_groups_0 = const()[name = string("op_1855_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48750080))), nonzero_data = tensor<fp16, [3064]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48743872))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1855_cast_fp16 = conv(dilations = var_1855_dilations_0, groups = var_1855_groups_0, pad = var_1855_pad_0, pad_type = var_1855_pad_type_0, strides = var_1855_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1855_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_19_cast_fp16 = add(x = var_1849_cast_fp16, y = var_1855_cast_fp16)[name = string("query_19_cast_fp16")];
+            string var_1864_pad_type_0 = const()[name = string("op_1864_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1864_strides_0 = const()[name = string("op_1864_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1864_pad_0 = const()[name = string("op_1864_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1864_dilations_0 = const()[name = string("op_1864_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1864_groups_0 = const()[name = string("op_1864_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48823872))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49118848))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1864_cast_fp16 = conv(dilations = var_1864_dilations_0, groups = var_1864_groups_0, pad = var_1864_pad_0, pad_type = var_1864_pad_type_0, strides = var_1864_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1864_cast_fp16")];
+            string var_1870_pad_type_0 = const()[name = string("op_1870_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1870_strides_0 = const()[name = string("op_1870_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1870_pad_0 = const()[name = string("op_1870_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1870_dilations_0 = const()[name = string("op_1870_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1870_groups_0 = const()[name = string("op_1870_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49125760))), nonzero_data = tensor<fp16, [3358]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49118976))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1870_cast_fp16 = conv(dilations = var_1870_dilations_0, groups = var_1870_groups_0, pad = var_1870_pad_0, pad_type = var_1870_pad_type_0, strides = var_1870_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1870_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_19_cast_fp16 = add(x = var_1864_cast_fp16, y = var_1870_cast_fp16)[name = string("key_19_cast_fp16")];
+            string var_1880_pad_type_0 = const()[name = string("op_1880_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1880_strides_0 = const()[name = string("op_1880_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1880_pad_0 = const()[name = string("op_1880_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1880_dilations_0 = const()[name = string("op_1880_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1880_groups_0 = const()[name = string("op_1880_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49199552))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49494528))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49494656)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1880_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1880_dilations_0, groups = var_1880_groups_0, pad = var_1880_pad_0, pad_type = var_1880_pad_type_0, strides = var_1880_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1880_cast_fp16")];
+            string var_1886_pad_type_0 = const()[name = string("op_1886_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1886_strides_0 = const()[name = string("op_1886_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1886_pad_0 = const()[name = string("op_1886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1886_dilations_0 = const()[name = string("op_1886_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1886_groups_0 = const()[name = string("op_1886_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49501760))), nonzero_data = tensor<fp16, [2711]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49496256))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1886_cast_fp16 = conv(dilations = var_1886_dilations_0, groups = var_1886_groups_0, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1886_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1886_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_19_cast_fp16 = add(x = var_1880_cast_fp16, y = var_1886_cast_fp16)[name = string("value_19_cast_fp16")];
+            tensor<int32, [4]> var_1889 = const()[name = string("op_1889"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_19_cast_fp16 = reshape(shape = var_1889, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")];
+            fp16 var_1891_to_fp16 = const()[name = string("op_1891_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1892_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1891_to_fp16)[name = string("op_1892_cast_fp16")];
+            tensor<int32, [4]> var_1893 = const()[name = string("op_1893"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1894_cast_fp16 = reshape(shape = var_1893, x = key_19_cast_fp16)[name = string("op_1894_cast_fp16")];
+            bool mh_w_19_transpose_x_0 = const()[name = string("mh_w_19_transpose_x_0"), val = bool(true)];
+            bool mh_w_19_transpose_y_0 = const()[name = string("mh_w_19_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1892_cast_fp16, y = var_1894_cast_fp16)[name = string("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1897_cast_fp16 = softmax(axis = var_1808, x = mh_w_19_cast_fp16)[name = string("op_1897_cast_fp16")];
+            tensor<int32, [4]> var_1898 = const()[name = string("op_1898"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1899_cast_fp16 = reshape(shape = var_1898, x = value_19_cast_fp16)[name = string("op_1899_cast_fp16")];
+            bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)];
+            bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1899_cast_fp16, y = var_1897_cast_fp16)[name = string("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1902 = const()[name = string("op_1902"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_73_cast_fp16 = reshape(shape = var_1902, x = attn_19_cast_fp16)[name = string("input_73_cast_fp16")];
+            string var_1912_pad_type_0 = const()[name = string("op_1912_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1912_strides_0 = const()[name = string("op_1912_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1912_pad_0 = const()[name = string("op_1912_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1912_dilations_0 = const()[name = string("op_1912_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1912_groups_0 = const()[name = string("op_1912_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49575552))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49870528))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49870656)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1912_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1912_dilations_0, groups = var_1912_groups_0, pad = var_1912_pad_0, pad_type = var_1912_pad_type_0, strides = var_1912_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_1912_cast_fp16")];
+            string var_1918_pad_type_0 = const()[name = string("op_1918_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1918_strides_0 = const()[name = string("op_1918_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1918_pad_0 = const()[name = string("op_1918_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1918_dilations_0 = const()[name = string("op_1918_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1918_groups_0 = const()[name = string("op_1918_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49877760))), nonzero_data = tensor<fp16, [2712]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49872256))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1918_cast_fp16 = conv(dilations = var_1918_dilations_0, groups = var_1918_groups_0, pad = var_1918_pad_0, pad_type = var_1918_pad_type_0, strides = var_1918_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_1918_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_39_cast_fp16 = add(x = var_1912_cast_fp16, y = var_1918_cast_fp16)[name = string("obj_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_39_cast_fp16")];
+            tensor<int32, [1]> out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1929_to_fp16 = const()[name = string("op_1929_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1929_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")];
+            tensor<fp16, [768]> input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49951552)))];
+            tensor<fp16, [768]> input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49953152)))];
+            fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("input_75_cast_fp16")];
+            string var_1947_pad_type_0 = const()[name = string("op_1947_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1947_strides_0 = const()[name = string("op_1947_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1947_pad_0 = const()[name = string("op_1947_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1947_dilations_0 = const()[name = string("op_1947_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1947_groups_0 = const()[name = string("op_1947_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49954752))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51134464))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51134592)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1947_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_1947_dilations_0, groups = var_1947_groups_0, pad = var_1947_pad_0, pad_type = var_1947_pad_type_0, strides = var_1947_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_1947_cast_fp16")];
+            string var_1953_pad_type_0 = const()[name = string("op_1953_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1953_strides_0 = const()[name = string("op_1953_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1953_pad_0 = const()[name = string("op_1953_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1953_dilations_0 = const()[name = string("op_1953_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1953_groups_0 = const()[name = string("op_1953_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51171648))), nonzero_data = tensor<fp16, [15382]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51140800))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1953_cast_fp16 = conv(dilations = var_1953_dilations_0, groups = var_1953_groups_0, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1953_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_1953_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_77_cast_fp16 = add(x = var_1947_cast_fp16, y = var_1953_cast_fp16)[name = string("input_77_cast_fp16")];
+            string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")];
+            string var_1964_pad_type_0 = const()[name = string("op_1964_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1964_strides_0 = const()[name = string("op_1964_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1964_pad_0 = const()[name = string("op_1964_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1964_dilations_0 = const()[name = string("op_1964_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1964_groups_0 = const()[name = string("op_1964_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51466624))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52646336))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52646464)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1964_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_1964_dilations_0, groups = var_1964_groups_0, pad = var_1964_pad_0, pad_type = var_1964_pad_type_0, strides = var_1964_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_1964_cast_fp16")];
+            string var_1970_pad_type_0 = const()[name = string("op_1970_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1970_strides_0 = const()[name = string("op_1970_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1970_pad_0 = const()[name = string("op_1970_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1970_dilations_0 = const()[name = string("op_1970_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1970_groups_0 = const()[name = string("op_1970_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52680576))), nonzero_data = tensor<fp16, [16208]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52648064))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1970_cast_fp16 = conv(dilations = var_1970_dilations_0, groups = var_1970_groups_0, pad = var_1970_pad_0, pad_type = var_1970_pad_type_0, strides = var_1970_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_1970_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_23_cast_fp16 = add(x = var_1964_cast_fp16, y = var_1970_cast_fp16)[name = string("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_41_cast_fp16")];
+            int32 var_1980 = const()[name = string("op_1980"), val = int32(3)];
+            tensor<int32, [1]> out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_1999_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")];
+            tensor<fp16, [768]> obj_41_gamma_0_to_fp16 = const()[name = string("obj_41_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52975552)))];
+            tensor<fp16, [768]> obj_41_beta_0_to_fp16 = const()[name = string("obj_41_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52977152)))];
+            fp16 obj_41_epsilon_0_to_fp16 = const()[name = string("obj_41_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("obj_41_cast_fp16")];
+            string var_2021_pad_type_0 = const()[name = string("op_2021_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2021_strides_0 = const()[name = string("op_2021_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2021_pad_0 = const()[name = string("op_2021_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2021_dilations_0 = const()[name = string("op_2021_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2021_groups_0 = const()[name = string("op_2021_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52978752))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53273728))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53273856)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2021_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2021_dilations_0, groups = var_2021_groups_0, pad = var_2021_pad_0, pad_type = var_2021_pad_type_0, strides = var_2021_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2021_cast_fp16")];
+            string var_2027_pad_type_0 = const()[name = string("op_2027_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2027_strides_0 = const()[name = string("op_2027_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2027_pad_0 = const()[name = string("op_2027_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2027_dilations_0 = const()[name = string("op_2027_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2027_groups_0 = const()[name = string("op_2027_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53281536))), nonzero_data = tensor<fp16, [3007]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53275456))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2027_cast_fp16 = conv(dilations = var_2027_dilations_0, groups = var_2027_groups_0, pad = var_2027_pad_0, pad_type = var_2027_pad_type_0, strides = var_2027_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2027_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_21_cast_fp16 = add(x = var_2021_cast_fp16, y = var_2027_cast_fp16)[name = string("query_21_cast_fp16")];
+            string var_2036_pad_type_0 = const()[name = string("op_2036_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2036_strides_0 = const()[name = string("op_2036_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2036_pad_0 = const()[name = string("op_2036_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2036_dilations_0 = const()[name = string("op_2036_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2036_groups_0 = const()[name = string("op_2036_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53355328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53650304))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2036_cast_fp16 = conv(dilations = var_2036_dilations_0, groups = var_2036_groups_0, pad = var_2036_pad_0, pad_type = var_2036_pad_type_0, strides = var_2036_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2036_cast_fp16")];
+            string var_2042_pad_type_0 = const()[name = string("op_2042_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2042_strides_0 = const()[name = string("op_2042_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2042_pad_0 = const()[name = string("op_2042_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2042_dilations_0 = const()[name = string("op_2042_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2042_groups_0 = const()[name = string("op_2042_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53657216))), nonzero_data = tensor<fp16, [3358]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53650432))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2042_cast_fp16 = conv(dilations = var_2042_dilations_0, groups = var_2042_groups_0, pad = var_2042_pad_0, pad_type = var_2042_pad_type_0, strides = var_2042_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2042_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_21_cast_fp16 = add(x = var_2036_cast_fp16, y = var_2042_cast_fp16)[name = string("key_21_cast_fp16")];
+            string var_2052_pad_type_0 = const()[name = string("op_2052_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2052_strides_0 = const()[name = string("op_2052_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2052_pad_0 = const()[name = string("op_2052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2052_dilations_0 = const()[name = string("op_2052_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2052_groups_0 = const()[name = string("op_2052_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53731008))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54025984))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54026112)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2052_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2052_dilations_0, groups = var_2052_groups_0, pad = var_2052_pad_0, pad_type = var_2052_pad_type_0, strides = var_2052_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2052_cast_fp16")];
+            string var_2058_pad_type_0 = const()[name = string("op_2058_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2058_strides_0 = const()[name = string("op_2058_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2058_pad_0 = const()[name = string("op_2058_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2058_dilations_0 = const()[name = string("op_2058_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2058_groups_0 = const()[name = string("op_2058_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54033152))), nonzero_data = tensor<fp16, [2685]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54027712))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2058_cast_fp16 = conv(dilations = var_2058_dilations_0, groups = var_2058_groups_0, pad = var_2058_pad_0, pad_type = var_2058_pad_type_0, strides = var_2058_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2058_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_21_cast_fp16 = add(x = var_2052_cast_fp16, y = var_2058_cast_fp16)[name = string("value_21_cast_fp16")];
+            tensor<int32, [4]> var_2061 = const()[name = string("op_2061"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_21_cast_fp16 = reshape(shape = var_2061, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")];
+            fp16 var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_2064_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_2063_to_fp16)[name = string("op_2064_cast_fp16")];
+            tensor<int32, [4]> var_2065 = const()[name = string("op_2065"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2066_cast_fp16 = reshape(shape = var_2065, x = key_21_cast_fp16)[name = string("op_2066_cast_fp16")];
+            bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)];
+            bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2064_cast_fp16, y = var_2066_cast_fp16)[name = string("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_2069_cast_fp16 = softmax(axis = var_1980, x = mh_w_21_cast_fp16)[name = string("op_2069_cast_fp16")];
+            tensor<int32, [4]> var_2070 = const()[name = string("op_2070"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2071_cast_fp16 = reshape(shape = var_2070, x = value_21_cast_fp16)[name = string("op_2071_cast_fp16")];
+            bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)];
+            bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_2071_cast_fp16, y = var_2069_cast_fp16)[name = string("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_2074 = const()[name = string("op_2074"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_81_cast_fp16 = reshape(shape = var_2074, x = attn_21_cast_fp16)[name = string("input_81_cast_fp16")];
+            string var_2084_pad_type_0 = const()[name = string("op_2084_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2084_strides_0 = const()[name = string("op_2084_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2084_pad_0 = const()[name = string("op_2084_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2084_dilations_0 = const()[name = string("op_2084_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2084_groups_0 = const()[name = string("op_2084_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54106944))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54401920))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54402048)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2084_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2084_dilations_0, groups = var_2084_groups_0, pad = var_2084_pad_0, pad_type = var_2084_pad_type_0, strides = var_2084_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2084_cast_fp16")];
+            string var_2090_pad_type_0 = const()[name = string("op_2090_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2090_strides_0 = const()[name = string("op_2090_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2090_pad_0 = const()[name = string("op_2090_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2090_dilations_0 = const()[name = string("op_2090_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2090_groups_0 = const()[name = string("op_2090_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54409472))), nonzero_data = tensor<fp16, [2866]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54403648))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2090_cast_fp16 = conv(dilations = var_2090_dilations_0, groups = var_2090_groups_0, pad = var_2090_pad_0, pad_type = var_2090_pad_type_0, strides = var_2090_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2090_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_43_cast_fp16 = add(x = var_2084_cast_fp16, y = var_2090_cast_fp16)[name = string("obj_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = string("inputs_43_cast_fp16")];
+            tensor<int32, [1]> out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2101_to_fp16 = const()[name = string("op_2101_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2101_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")];
+            tensor<fp16, [768]> input_83_gamma_0_to_fp16 = const()[name = string("input_83_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54483264)))];
+            tensor<fp16, [768]> input_83_beta_0_to_fp16 = const()[name = string("input_83_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54484864)))];
+            fp16 input_83_epsilon_0_to_fp16 = const()[name = string("input_83_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("input_83_cast_fp16")];
+            string var_2119_pad_type_0 = const()[name = string("op_2119_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2119_strides_0 = const()[name = string("op_2119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2119_pad_0 = const()[name = string("op_2119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2119_dilations_0 = const()[name = string("op_2119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2119_groups_0 = const()[name = string("op_2119_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54486464))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55666176))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55666304)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_2119_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_2119_dilations_0, groups = var_2119_groups_0, pad = var_2119_pad_0, pad_type = var_2119_pad_type_0, strides = var_2119_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2119_cast_fp16")];
+            string var_2125_pad_type_0 = const()[name = string("op_2125_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2125_strides_0 = const()[name = string("op_2125_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2125_pad_0 = const()[name = string("op_2125_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2125_dilations_0 = const()[name = string("op_2125_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2125_groups_0 = const()[name = string("op_2125_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55704320))), nonzero_data = tensor<fp16, [15858]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55672512))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_2125_cast_fp16 = conv(dilations = var_2125_dilations_0, groups = var_2125_groups_0, pad = var_2125_pad_0, pad_type = var_2125_pad_type_0, strides = var_2125_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2125_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_85_cast_fp16 = add(x = var_2119_cast_fp16, y = var_2125_cast_fp16)[name = string("input_85_cast_fp16")];
+            string input_87_mode_0 = const()[name = string("input_87_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = string("input_87_cast_fp16")];
+            string var_2136_pad_type_0 = const()[name = string("op_2136_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2136_strides_0 = const()[name = string("op_2136_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2136_pad_0 = const()[name = string("op_2136_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2136_dilations_0 = const()[name = string("op_2136_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2136_groups_0 = const()[name = string("op_2136_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55999296))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57179008))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57179136)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2136_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_2136_dilations_0, groups = var_2136_groups_0, pad = var_2136_pad_0, pad_type = var_2136_pad_type_0, strides = var_2136_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = string("op_2136_cast_fp16")];
+            string var_2142_pad_type_0 = const()[name = string("op_2142_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2142_strides_0 = const()[name = string("op_2142_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2142_pad_0 = const()[name = string("op_2142_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2142_dilations_0 = const()[name = string("op_2142_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2142_groups_0 = const()[name = string("op_2142_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57215232))), nonzero_data = tensor<fp16, [17216]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57180736))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2142_cast_fp16 = conv(dilations = var_2142_dilations_0, groups = var_2142_groups_0, pad = var_2142_pad_0, pad_type = var_2142_pad_type_0, strides = var_2142_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = string("op_2142_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_25_cast_fp16 = add(x = var_2136_cast_fp16, y = var_2142_cast_fp16)[name = string("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_45_cast_fp16")];
+            int32 var_2152 = const()[name = string("op_2152"), val = int32(3)];
+            tensor<int32, [1]> out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2171_to_fp16 = const()[name = string("op_2171_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2171_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")];
+            tensor<fp16, [768]> obj_45_gamma_0_to_fp16 = const()[name = string("obj_45_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57510208)))];
+            tensor<fp16, [768]> obj_45_beta_0_to_fp16 = const()[name = string("obj_45_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57511808)))];
+            fp16 obj_45_epsilon_0_to_fp16 = const()[name = string("obj_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_45_cast_fp16")];
+            string var_2193_pad_type_0 = const()[name = string("op_2193_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2193_strides_0 = const()[name = string("op_2193_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2193_pad_0 = const()[name = string("op_2193_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2193_dilations_0 = const()[name = string("op_2193_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2193_groups_0 = const()[name = string("op_2193_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57513408))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57808384))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57808512)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2193_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2193_dilations_0, groups = var_2193_groups_0, pad = var_2193_pad_0, pad_type = var_2193_pad_type_0, strides = var_2193_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2193_cast_fp16")];
+            string var_2199_pad_type_0 = const()[name = string("op_2199_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2199_strides_0 = const()[name = string("op_2199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2199_pad_0 = const()[name = string("op_2199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2199_dilations_0 = const()[name = string("op_2199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2199_groups_0 = const()[name = string("op_2199_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57816768))), nonzero_data = tensor<fp16, [3283]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57810112))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2199_cast_fp16 = conv(dilations = var_2199_dilations_0, groups = var_2199_groups_0, pad = var_2199_pad_0, pad_type = var_2199_pad_type_0, strides = var_2199_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2199_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_cast_fp16 = add(x = var_2193_cast_fp16, y = var_2199_cast_fp16)[name = string("query_cast_fp16")];
+            string var_2208_pad_type_0 = const()[name = string("op_2208_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2208_strides_0 = const()[name = string("op_2208_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2208_pad_0 = const()[name = string("op_2208_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2208_dilations_0 = const()[name = string("op_2208_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2208_groups_0 = const()[name = string("op_2208_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57890560))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58185536))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2208_cast_fp16 = conv(dilations = var_2208_dilations_0, groups = var_2208_groups_0, pad = var_2208_pad_0, pad_type = var_2208_pad_type_0, strides = var_2208_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2208_cast_fp16")];
+            string var_2214_pad_type_0 = const()[name = string("op_2214_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2214_strides_0 = const()[name = string("op_2214_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2214_pad_0 = const()[name = string("op_2214_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2214_dilations_0 = const()[name = string("op_2214_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2214_groups_0 = const()[name = string("op_2214_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58192896))), nonzero_data = tensor<fp16, [3584]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58185664))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2214_cast_fp16 = conv(dilations = var_2214_dilations_0, groups = var_2214_groups_0, pad = var_2214_pad_0, pad_type = var_2214_pad_type_0, strides = var_2214_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2214_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_cast_fp16 = add(x = var_2208_cast_fp16, y = var_2214_cast_fp16)[name = string("key_cast_fp16")];
+            string var_2224_pad_type_0 = const()[name = string("op_2224_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2224_strides_0 = const()[name = string("op_2224_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2224_pad_0 = const()[name = string("op_2224_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2224_dilations_0 = const()[name = string("op_2224_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2224_groups_0 = const()[name = string("op_2224_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58266688))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58561664))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58561792)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2224_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2224_dilations_0, groups = var_2224_groups_0, pad = var_2224_pad_0, pad_type = var_2224_pad_type_0, strides = var_2224_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2224_cast_fp16")];
+            string var_2230_pad_type_0 = const()[name = string("op_2230_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2230_strides_0 = const()[name = string("op_2230_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2230_pad_0 = const()[name = string("op_2230_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2230_dilations_0 = const()[name = string("op_2230_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2230_groups_0 = const()[name = string("op_2230_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58569728))), nonzero_data = tensor<fp16, [3113]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58563392))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2230_cast_fp16 = conv(dilations = var_2230_dilations_0, groups = var_2230_groups_0, pad = var_2230_pad_0, pad_type = var_2230_pad_type_0, strides = var_2230_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2230_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_cast_fp16 = add(x = var_2224_cast_fp16, y = var_2230_cast_fp16)[name = string("value_cast_fp16")];
+            tensor<int32, [4]> var_2233 = const()[name = string("op_2233"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_cast_fp16 = reshape(shape = var_2233, x = query_cast_fp16)[name = string("mh_q_cast_fp16")];
+            fp16 var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_2236_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_2235_to_fp16)[name = string("op_2236_cast_fp16")];
+            tensor<int32, [4]> var_2237 = const()[name = string("op_2237"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2238_cast_fp16 = reshape(shape = var_2237, x = key_cast_fp16)[name = string("op_2238_cast_fp16")];
+            bool mh_w_transpose_x_0 = const()[name = string("mh_w_transpose_x_0"), val = bool(true)];
+            bool mh_w_transpose_y_0 = const()[name = string("mh_w_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2236_cast_fp16, y = var_2238_cast_fp16)[name = string("mh_w_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_2241_cast_fp16 = softmax(axis = var_2152, x = mh_w_cast_fp16)[name = string("op_2241_cast_fp16")];
+            tensor<int32, [4]> var_2242 = const()[name = string("op_2242"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2243_cast_fp16 = reshape(shape = var_2242, x = value_cast_fp16)[name = string("op_2243_cast_fp16")];
+            bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)];
+            bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2243_cast_fp16, y = var_2241_cast_fp16)[name = string("attn_cast_fp16")];
+            tensor<int32, [4]> var_2246 = const()[name = string("op_2246"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_89_cast_fp16 = reshape(shape = var_2246, x = attn_cast_fp16)[name = string("input_89_cast_fp16")];
+            string var_2256_pad_type_0 = const()[name = string("op_2256_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2256_strides_0 = const()[name = string("op_2256_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2256_pad_0 = const()[name = string("op_2256_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2256_dilations_0 = const()[name = string("op_2256_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2256_groups_0 = const()[name = string("op_2256_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58643520))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58938496))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58938624)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2256_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2256_dilations_0, groups = var_2256_groups_0, pad = var_2256_pad_0, pad_type = var_2256_pad_type_0, strides = var_2256_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2256_cast_fp16")];
+            string var_2262_pad_type_0 = const()[name = string("op_2262_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2262_strides_0 = const()[name = string("op_2262_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2262_pad_0 = const()[name = string("op_2262_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2262_dilations_0 = const()[name = string("op_2262_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2262_groups_0 = const()[name = string("op_2262_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58947520))), nonzero_data = tensor<fp16, [3613]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58940224))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2262_cast_fp16 = conv(dilations = var_2262_dilations_0, groups = var_2262_groups_0, pad = var_2262_pad_0, pad_type = var_2262_pad_type_0, strides = var_2262_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2262_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_cast_fp16 = add(x = var_2256_cast_fp16, y = var_2262_cast_fp16)[name = string("obj_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = string("inputs_47_cast_fp16")];
+            tensor<int32, [1]> out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2273_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")];
+            tensor<fp16, [768]> input_91_gamma_0_to_fp16 = const()[name = string("input_91_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59021312)))];
+            tensor<fp16, [768]> input_91_beta_0_to_fp16 = const()[name = string("input_91_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59022912)))];
+            fp16 input_91_epsilon_0_to_fp16 = const()[name = string("input_91_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_91_cast_fp16")];
+            string var_2291_pad_type_0 = const()[name = string("op_2291_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2291_strides_0 = const()[name = string("op_2291_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2291_pad_0 = const()[name = string("op_2291_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2291_dilations_0 = const()[name = string("op_2291_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2291_groups_0 = const()[name = string("op_2291_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59024512))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60204224))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60204352)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_2291_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_2291_dilations_0, groups = var_2291_groups_0, pad = var_2291_pad_0, pad_type = var_2291_pad_type_0, strides = var_2291_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2291_cast_fp16")];
+            string var_2297_pad_type_0 = const()[name = string("op_2297_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2297_strides_0 = const()[name = string("op_2297_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2297_pad_0 = const()[name = string("op_2297_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2297_dilations_0 = const()[name = string("op_2297_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2297_groups_0 = const()[name = string("op_2297_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60241728))), nonzero_data = tensor<fp16, [15540]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60210560))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_2297_cast_fp16 = conv(dilations = var_2297_dilations_0, groups = var_2297_groups_0, pad = var_2297_pad_0, pad_type = var_2297_pad_type_0, strides = var_2297_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2297_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_93_cast_fp16 = add(x = var_2291_cast_fp16, y = var_2297_cast_fp16)[name = string("input_93_cast_fp16")];
+            string input_95_mode_0 = const()[name = string("input_95_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = string("input_95_cast_fp16")];
+            string var_2308_pad_type_0 = const()[name = string("op_2308_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2308_strides_0 = const()[name = string("op_2308_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2308_pad_0 = const()[name = string("op_2308_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2308_dilations_0 = const()[name = string("op_2308_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2308_groups_0 = const()[name = string("op_2308_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60536704))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61716416))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61716544)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2308_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_2308_dilations_0, groups = var_2308_groups_0, pad = var_2308_pad_0, pad_type = var_2308_pad_type_0, strides = var_2308_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2308_cast_fp16")];
+            string var_2314_pad_type_0 = const()[name = string("op_2314_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2314_strides_0 = const()[name = string("op_2314_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2314_pad_0 = const()[name = string("op_2314_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2314_dilations_0 = const()[name = string("op_2314_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2314_groups_0 = const()[name = string("op_2314_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61753984))), nonzero_data = tensor<fp16, [17871]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61718144))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2314_cast_fp16 = conv(dilations = var_2314_dilations_0, groups = var_2314_groups_0, pad = var_2314_pad_0, pad_type = var_2314_pad_type_0, strides = var_2314_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2314_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_cast_fp16 = add(x = var_2308_cast_fp16, y = var_2314_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = string("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = string("out_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2329_to_fp16 = const()[name = string("op_2329_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_2329_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")];
+            tensor<fp16, [768]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62048960)))];
+            tensor<fp16, [768]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62050560)))];
+            fp16 encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = string("encoder_output_embeds_type_fp32_cast_fp16")];
+            string var_2355_pad_type_0 = const()[name = string("op_2355_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2355_strides_0 = const()[name = string("op_2355_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2355_pad_0 = const()[name = string("op_2355_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2355_dilations_0 = const()[name = string("op_2355_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2355_groups_0 = const()[name = string("op_2355_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62052160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62347136))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2355_cast_fp16 = conv(dilations = var_2355_dilations_0, groups = var_2355_groups_0, pad = var_2355_pad_0, pad_type = var_2355_pad_type_0, strides = var_2355_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2355_cast_fp16")];
+            string var_2361_pad_type_0 = const()[name = string("op_2361_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2361_strides_0 = const()[name = string("op_2361_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2361_pad_0 = const()[name = string("op_2361_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2361_dilations_0 = const()[name = string("op_2361_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2361_groups_0 = const()[name = string("op_2361_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62362944))), nonzero_data = tensor<fp16, [7791]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62347264))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2361_cast_fp16 = conv(dilations = var_2361_dilations_0, groups = var_2361_groups_0, pad = var_2361_pad_0, pad_type = var_2361_pad_type_0, strides = var_2361_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2361_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2362_cast_fp16 = add(x = var_2355_cast_fp16, y = var_2361_cast_fp16)[name = string("op_2362_cast_fp16")];
+            string var_2371_pad_type_0 = const()[name = string("op_2371_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2371_strides_0 = const()[name = string("op_2371_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2371_pad_0 = const()[name = string("op_2371_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2371_dilations_0 = const()[name = string("op_2371_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2371_groups_0 = const()[name = string("op_2371_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62436736))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62731712))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62731840)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2371_cast_fp16 = conv(bias = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2371_dilations_0, groups = var_2371_groups_0, pad = var_2371_pad_0, pad_type = var_2371_pad_type_0, strides = var_2371_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2371_cast_fp16")];
+            string var_2377_pad_type_0 = const()[name = string("op_2377_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2377_strides_0 = const()[name = string("op_2377_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2377_pad_0 = const()[name = string("op_2377_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2377_dilations_0 = const()[name = string("op_2377_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2377_groups_0 = const()[name = string("op_2377_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62740288))), nonzero_data = tensor<fp16, [3368]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62733440))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2377_cast_fp16 = conv(dilations = var_2377_dilations_0, groups = var_2377_groups_0, pad = var_2377_pad_0, pad_type = var_2377_pad_type_0, strides = var_2377_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2377_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2378_cast_fp16 = add(x = var_2371_cast_fp16, y = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")];
+            string var_2398_pad_type_0 = const()[name = string("op_2398_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2398_strides_0 = const()[name = string("op_2398_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2398_pad_0 = const()[name = string("op_2398_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2398_dilations_0 = const()[name = string("op_2398_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2398_groups_0 = const()[name = string("op_2398_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62814080))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63109056))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2398_cast_fp16 = conv(dilations = var_2398_dilations_0, groups = var_2398_groups_0, pad = var_2398_pad_0, pad_type = var_2398_pad_type_0, strides = var_2398_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2398_cast_fp16")];
+            string var_2404_pad_type_0 = const()[name = string("op_2404_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2404_strides_0 = const()[name = string("op_2404_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2404_pad_0 = const()[name = string("op_2404_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2404_dilations_0 = const()[name = string("op_2404_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2404_groups_0 = const()[name = string("op_2404_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63118656))), nonzero_data = tensor<fp16, [4694]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63109184))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2404_cast_fp16 = conv(dilations = var_2404_dilations_0, groups = var_2404_groups_0, pad = var_2404_pad_0, pad_type = var_2404_pad_type_0, strides = var_2404_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2404_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2405_cast_fp16 = add(x = var_2398_cast_fp16, y = var_2404_cast_fp16)[name = string("op_2405_cast_fp16")];
+            string var_2414_pad_type_0 = const()[name = string("op_2414_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2414_strides_0 = const()[name = string("op_2414_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2414_pad_0 = const()[name = string("op_2414_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2414_dilations_0 = const()[name = string("op_2414_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2414_groups_0 = const()[name = string("op_2414_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63192448))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63487424))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63487552)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2414_cast_fp16 = conv(bias = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2414_dilations_0, groups = var_2414_groups_0, pad = var_2414_pad_0, pad_type = var_2414_pad_type_0, strides = var_2414_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2414_cast_fp16")];
+            string var_2420_pad_type_0 = const()[name = string("op_2420_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2420_strides_0 = const()[name = string("op_2420_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2420_pad_0 = const()[name = string("op_2420_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2420_dilations_0 = const()[name = string("op_2420_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2420_groups_0 = const()[name = string("op_2420_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63494976))), nonzero_data = tensor<fp16, [2867]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63489152))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2420_cast_fp16 = conv(dilations = var_2420_dilations_0, groups = var_2420_groups_0, pad = var_2420_pad_0, pad_type = var_2420_pad_type_0, strides = var_2420_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2420_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2421_cast_fp16 = add(x = var_2414_cast_fp16, y = var_2420_cast_fp16)[name = string("op_2421_cast_fp16")];
+            string var_2441_pad_type_0 = const()[name = string("op_2441_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2441_strides_0 = const()[name = string("op_2441_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2441_pad_0 = const()[name = string("op_2441_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2441_dilations_0 = const()[name = string("op_2441_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2441_groups_0 = const()[name = string("op_2441_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63568768))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63863744))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2441_cast_fp16 = conv(dilations = var_2441_dilations_0, groups = var_2441_groups_0, pad = var_2441_pad_0, pad_type = var_2441_pad_type_0, strides = var_2441_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2441_cast_fp16")];
+            string var_2447_pad_type_0 = const()[name = string("op_2447_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2447_strides_0 = const()[name = string("op_2447_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2447_pad_0 = const()[name = string("op_2447_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2447_dilations_0 = const()[name = string("op_2447_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2447_groups_0 = const()[name = string("op_2447_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63873088))), nonzero_data = tensor<fp16, [4568]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63863872))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2447_cast_fp16 = conv(dilations = var_2447_dilations_0, groups = var_2447_groups_0, pad = var_2447_pad_0, pad_type = var_2447_pad_type_0, strides = var_2447_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2447_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2448_cast_fp16 = add(x = var_2441_cast_fp16, y = var_2447_cast_fp16)[name = string("op_2448_cast_fp16")];
+            string var_2457_pad_type_0 = const()[name = string("op_2457_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2457_strides_0 = const()[name = string("op_2457_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2457_pad_0 = const()[name = string("op_2457_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2457_dilations_0 = const()[name = string("op_2457_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2457_groups_0 = const()[name = string("op_2457_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63946880))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64241856))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64241984)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2457_cast_fp16 = conv(bias = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2457_dilations_0, groups = var_2457_groups_0, pad = var_2457_pad_0, pad_type = var_2457_pad_type_0, strides = var_2457_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2457_cast_fp16")];
+            string var_2463_pad_type_0 = const()[name = string("op_2463_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2463_strides_0 = const()[name = string("op_2463_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2463_pad_0 = const()[name = string("op_2463_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2463_dilations_0 = const()[name = string("op_2463_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2463_groups_0 = const()[name = string("op_2463_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64252480))), nonzero_data = tensor<fp16, [4387]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64243584))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2463_cast_fp16 = conv(dilations = var_2463_dilations_0, groups = var_2463_groups_0, pad = var_2463_pad_0, pad_type = var_2463_pad_type_0, strides = var_2463_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2463_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2464_cast_fp16 = add(x = var_2457_cast_fp16, y = var_2463_cast_fp16)[name = string("op_2464_cast_fp16")];
+            string var_2484_pad_type_0 = const()[name = string("op_2484_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2484_strides_0 = const()[name = string("op_2484_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2484_pad_0 = const()[name = string("op_2484_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2484_dilations_0 = const()[name = string("op_2484_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2484_groups_0 = const()[name = string("op_2484_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64326272))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64621248))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2484_cast_fp16 = conv(dilations = var_2484_dilations_0, groups = var_2484_groups_0, pad = var_2484_pad_0, pad_type = var_2484_pad_type_0, strides = var_2484_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2484_cast_fp16")];
+            string var_2490_pad_type_0 = const()[name = string("op_2490_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2490_strides_0 = const()[name = string("op_2490_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2490_pad_0 = const()[name = string("op_2490_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2490_dilations_0 = const()[name = string("op_2490_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2490_groups_0 = const()[name = string("op_2490_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64636864))), nonzero_data = tensor<fp16, [7698]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64621376))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2490_cast_fp16 = conv(dilations = var_2490_dilations_0, groups = var_2490_groups_0, pad = var_2490_pad_0, pad_type = var_2490_pad_type_0, strides = var_2490_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2490_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2491_cast_fp16 = add(x = var_2484_cast_fp16, y = var_2490_cast_fp16)[name = string("op_2491_cast_fp16")];
+            string var_2500_pad_type_0 = const()[name = string("op_2500_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2500_strides_0 = const()[name = string("op_2500_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2500_pad_0 = const()[name = string("op_2500_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2500_dilations_0 = const()[name = string("op_2500_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2500_groups_0 = const()[name = string("op_2500_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64710656))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65005632))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65005760)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2500_cast_fp16 = conv(bias = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2500_dilations_0, groups = var_2500_groups_0, pad = var_2500_pad_0, pad_type = var_2500_pad_type_0, strides = var_2500_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2500_cast_fp16")];
+            string var_2506_pad_type_0 = const()[name = string("op_2506_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2506_strides_0 = const()[name = string("op_2506_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2506_pad_0 = const()[name = string("op_2506_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2506_dilations_0 = const()[name = string("op_2506_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2506_groups_0 = const()[name = string("op_2506_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65020160))), nonzero_data = tensor<fp16, [6365]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65007360))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2506_cast_fp16 = conv(dilations = var_2506_dilations_0, groups = var_2506_groups_0, pad = var_2506_pad_0, pad_type = var_2506_pad_type_0, strides = var_2506_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2506_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2507_cast_fp16 = add(x = var_2500_cast_fp16, y = var_2506_cast_fp16)[name = string("op_2507_cast_fp16")];
+            string var_2527_pad_type_0 = const()[name = string("op_2527_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2527_strides_0 = const()[name = string("op_2527_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2527_pad_0 = const()[name = string("op_2527_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2527_dilations_0 = const()[name = string("op_2527_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2527_groups_0 = const()[name = string("op_2527_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65093952))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65388928))))[name = string("decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2527_cast_fp16 = conv(dilations = var_2527_dilations_0, groups = var_2527_groups_0, pad = var_2527_pad_0, pad_type = var_2527_pad_type_0, strides = var_2527_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2527_cast_fp16")];
+            string var_2533_pad_type_0 = const()[name = string("op_2533_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2533_strides_0 = const()[name = string("op_2533_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2533_pad_0 = const()[name = string("op_2533_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2533_dilations_0 = const()[name = string("op_2533_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2533_groups_0 = const()[name = string("op_2533_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65403264))), nonzero_data = tensor<fp16, [7070]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65389056))))[name = string("decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2533_cast_fp16 = conv(dilations = var_2533_dilations_0, groups = var_2533_groups_0, pad = var_2533_pad_0, pad_type = var_2533_pad_type_0, strides = var_2533_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2533_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2534_cast_fp16 = add(x = var_2527_cast_fp16, y = var_2533_cast_fp16)[name = string("op_2534_cast_fp16")];
+            string var_2543_pad_type_0 = const()[name = string("op_2543_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2543_strides_0 = const()[name = string("op_2543_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2543_pad_0 = const()[name = string("op_2543_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2543_dilations_0 = const()[name = string("op_2543_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2543_groups_0 = const()[name = string("op_2543_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65477056))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65772032))))[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65772160)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2543_cast_fp16 = conv(bias = decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2543_dilations_0, groups = var_2543_groups_0, pad = var_2543_pad_0, pad_type = var_2543_pad_type_0, strides = var_2543_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2543_cast_fp16")];
+            string var_2549_pad_type_0 = const()[name = string("op_2549_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2549_strides_0 = const()[name = string("op_2549_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2549_pad_0 = const()[name = string("op_2549_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2549_dilations_0 = const()[name = string("op_2549_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2549_groups_0 = const()[name = string("op_2549_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65783744))), nonzero_data = tensor<fp16, [4931]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65773760))))[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2549_cast_fp16 = conv(dilations = var_2549_dilations_0, groups = var_2549_groups_0, pad = var_2549_pad_0, pad_type = var_2549_pad_type_0, strides = var_2549_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2549_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2550_cast_fp16 = add(x = var_2543_cast_fp16, y = var_2549_cast_fp16)[name = string("op_2550_cast_fp16")];
+            string var_2570_pad_type_0 = const()[name = string("op_2570_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2570_strides_0 = const()[name = string("op_2570_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2570_pad_0 = const()[name = string("op_2570_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2570_dilations_0 = const()[name = string("op_2570_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2570_groups_0 = const()[name = string("op_2570_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65857536))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66152512))))[name = string("decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2570_cast_fp16 = conv(dilations = var_2570_dilations_0, groups = var_2570_groups_0, pad = var_2570_pad_0, pad_type = var_2570_pad_type_0, strides = var_2570_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2570_cast_fp16")];
+            string var_2576_pad_type_0 = const()[name = string("op_2576_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2576_strides_0 = const()[name = string("op_2576_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2576_pad_0 = const()[name = string("op_2576_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2576_dilations_0 = const()[name = string("op_2576_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2576_groups_0 = const()[name = string("op_2576_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66163008))), nonzero_data = tensor<fp16, [5150]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66152640))))[name = string("decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2576_cast_fp16 = conv(dilations = var_2576_dilations_0, groups = var_2576_groups_0, pad = var_2576_pad_0, pad_type = var_2576_pad_type_0, strides = var_2576_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2576_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2577_cast_fp16 = add(x = var_2570_cast_fp16, y = var_2576_cast_fp16)[name = string("op_2577_cast_fp16")];
+            string var_2586_pad_type_0 = const()[name = string("op_2586_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2586_strides_0 = const()[name = string("op_2586_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2586_pad_0 = const()[name = string("op_2586_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2586_dilations_0 = const()[name = string("op_2586_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2586_groups_0 = const()[name = string("op_2586_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66236800))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66531776))))[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66531904)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2586_cast_fp16 = conv(bias = decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2586_dilations_0, groups = var_2586_groups_0, pad = var_2586_pad_0, pad_type = var_2586_pad_type_0, strides = var_2586_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2586_cast_fp16")];
+            string var_2592_pad_type_0 = const()[name = string("op_2592_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2592_strides_0 = const()[name = string("op_2592_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2592_pad_0 = const()[name = string("op_2592_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2592_dilations_0 = const()[name = string("op_2592_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2592_groups_0 = const()[name = string("op_2592_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66540800))), nonzero_data = tensor<fp16, [3612]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66533504))))[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2592_cast_fp16 = conv(dilations = var_2592_dilations_0, groups = var_2592_groups_0, pad = var_2592_pad_0, pad_type = var_2592_pad_type_0, strides = var_2592_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2592_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2593_cast_fp16 = add(x = var_2586_cast_fp16, y = var_2592_cast_fp16)[name = string("op_2593_cast_fp16")];
+            string var_2613_pad_type_0 = const()[name = string("op_2613_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2613_strides_0 = const()[name = string("op_2613_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2613_pad_0 = const()[name = string("op_2613_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2613_dilations_0 = const()[name = string("op_2613_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2613_groups_0 = const()[name = string("op_2613_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66614592))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66909568))))[name = string("decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2613_cast_fp16 = conv(dilations = var_2613_dilations_0, groups = var_2613_groups_0, pad = var_2613_pad_0, pad_type = var_2613_pad_type_0, strides = var_2613_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2613_cast_fp16")];
+            string var_2619_pad_type_0 = const()[name = string("op_2619_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2619_strides_0 = const()[name = string("op_2619_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2619_pad_0 = const()[name = string("op_2619_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2619_dilations_0 = const()[name = string("op_2619_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2619_groups_0 = const()[name = string("op_2619_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66917184))), nonzero_data = tensor<fp16, [3690]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66909696))))[name = string("decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2619_cast_fp16 = conv(dilations = var_2619_dilations_0, groups = var_2619_groups_0, pad = var_2619_pad_0, pad_type = var_2619_pad_type_0, strides = var_2619_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2619_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2620_cast_fp16 = add(x = var_2613_cast_fp16, y = var_2619_cast_fp16)[name = string("op_2620_cast_fp16")];
+            string var_2629_pad_type_0 = const()[name = string("op_2629_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2629_strides_0 = const()[name = string("op_2629_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2629_pad_0 = const()[name = string("op_2629_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2629_dilations_0 = const()[name = string("op_2629_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2629_groups_0 = const()[name = string("op_2629_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66990976))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67285952))))[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67286080)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2629_cast_fp16 = conv(bias = decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2629_dilations_0, groups = var_2629_groups_0, pad = var_2629_pad_0, pad_type = var_2629_pad_type_0, strides = var_2629_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2629_cast_fp16")];
+            string var_2635_pad_type_0 = const()[name = string("op_2635_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2635_strides_0 = const()[name = string("op_2635_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2635_pad_0 = const()[name = string("op_2635_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2635_dilations_0 = const()[name = string("op_2635_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2635_groups_0 = const()[name = string("op_2635_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67293952))), nonzero_data = tensor<fp16, [3079]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67287680))))[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2635_cast_fp16 = conv(dilations = var_2635_dilations_0, groups = var_2635_groups_0, pad = var_2635_pad_0, pad_type = var_2635_pad_type_0, strides = var_2635_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2635_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2636_cast_fp16 = add(x = var_2629_cast_fp16, y = var_2635_cast_fp16)[name = string("op_2636_cast_fp16")];
+            string var_2656_pad_type_0 = const()[name = string("op_2656_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2656_strides_0 = const()[name = string("op_2656_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2656_pad_0 = const()[name = string("op_2656_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2656_dilations_0 = const()[name = string("op_2656_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2656_groups_0 = const()[name = string("op_2656_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67367744))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67662720))))[name = string("decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2656_cast_fp16 = conv(dilations = var_2656_dilations_0, groups = var_2656_groups_0, pad = var_2656_pad_0, pad_type = var_2656_pad_type_0, strides = var_2656_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2656_cast_fp16")];
+            string var_2662_pad_type_0 = const()[name = string("op_2662_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2662_strides_0 = const()[name = string("op_2662_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2662_pad_0 = const()[name = string("op_2662_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2662_dilations_0 = const()[name = string("op_2662_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2662_groups_0 = const()[name = string("op_2662_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67670976))), nonzero_data = tensor<fp16, [4005]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67662848))))[name = string("decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2662_cast_fp16 = conv(dilations = var_2662_dilations_0, groups = var_2662_groups_0, pad = var_2662_pad_0, pad_type = var_2662_pad_type_0, strides = var_2662_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2662_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2663_cast_fp16 = add(x = var_2656_cast_fp16, y = var_2662_cast_fp16)[name = string("op_2663_cast_fp16")];
+            string var_2672_pad_type_0 = const()[name = string("op_2672_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2672_strides_0 = const()[name = string("op_2672_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2672_pad_0 = const()[name = string("op_2672_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2672_dilations_0 = const()[name = string("op_2672_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2672_groups_0 = const()[name = string("op_2672_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67744768))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68039744))))[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68039872)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2672_cast_fp16 = conv(bias = decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2672_dilations_0, groups = var_2672_groups_0, pad = var_2672_pad_0, pad_type = var_2672_pad_type_0, strides = var_2672_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2672_cast_fp16")];
+            string var_2678_pad_type_0 = const()[name = string("op_2678_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2678_strides_0 = const()[name = string("op_2678_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2678_pad_0 = const()[name = string("op_2678_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2678_dilations_0 = const()[name = string("op_2678_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2678_groups_0 = const()[name = string("op_2678_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68047232))), nonzero_data = tensor<fp16, [2848]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68041472))))[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2678_cast_fp16 = conv(dilations = var_2678_dilations_0, groups = var_2678_groups_0, pad = var_2678_pad_0, pad_type = var_2678_pad_type_0, strides = var_2678_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2678_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2679_cast_fp16 = add(x = var_2672_cast_fp16, y = var_2678_cast_fp16)[name = string("op_2679_cast_fp16")];
+            string var_2699_pad_type_0 = const()[name = string("op_2699_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2699_strides_0 = const()[name = string("op_2699_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2699_pad_0 = const()[name = string("op_2699_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2699_dilations_0 = const()[name = string("op_2699_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2699_groups_0 = const()[name = string("op_2699_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68121024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68416000))))[name = string("decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2699_cast_fp16 = conv(dilations = var_2699_dilations_0, groups = var_2699_groups_0, pad = var_2699_pad_0, pad_type = var_2699_pad_type_0, strides = var_2699_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2699_cast_fp16")];
+            string var_2705_pad_type_0 = const()[name = string("op_2705_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2705_strides_0 = const()[name = string("op_2705_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2705_pad_0 = const()[name = string("op_2705_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2705_dilations_0 = const()[name = string("op_2705_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2705_groups_0 = const()[name = string("op_2705_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68423936))), nonzero_data = tensor<fp16, [3844]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68416128))))[name = string("decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2705_cast_fp16 = conv(dilations = var_2705_dilations_0, groups = var_2705_groups_0, pad = var_2705_pad_0, pad_type = var_2705_pad_type_0, strides = var_2705_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2705_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2706_cast_fp16 = add(x = var_2699_cast_fp16, y = var_2705_cast_fp16)[name = string("op_2706_cast_fp16")];
+            string var_2715_pad_type_0 = const()[name = string("op_2715_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2715_strides_0 = const()[name = string("op_2715_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2715_pad_0 = const()[name = string("op_2715_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2715_dilations_0 = const()[name = string("op_2715_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2715_groups_0 = const()[name = string("op_2715_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68497728))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68792704))))[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68792832)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2715_cast_fp16 = conv(bias = decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2715_dilations_0, groups = var_2715_groups_0, pad = var_2715_pad_0, pad_type = var_2715_pad_type_0, strides = var_2715_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2715_cast_fp16")];
+            string var_2721_pad_type_0 = const()[name = string("op_2721_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2721_strides_0 = const()[name = string("op_2721_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2721_pad_0 = const()[name = string("op_2721_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2721_dilations_0 = const()[name = string("op_2721_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2721_groups_0 = const()[name = string("op_2721_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68800128))), nonzero_data = tensor<fp16, [2811]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68794432))))[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2721_cast_fp16 = conv(dilations = var_2721_dilations_0, groups = var_2721_groups_0, pad = var_2721_pad_0, pad_type = var_2721_pad_type_0, strides = var_2721_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2721_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2722_cast_fp16 = add(x = var_2715_cast_fp16, y = var_2721_cast_fp16)[name = string("op_2722_cast_fp16")];
+            string var_2742_pad_type_0 = const()[name = string("op_2742_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2742_strides_0 = const()[name = string("op_2742_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2742_pad_0 = const()[name = string("op_2742_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2742_dilations_0 = const()[name = string("op_2742_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2742_groups_0 = const()[name = string("op_2742_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68873920))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69168896))))[name = string("decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2742_cast_fp16 = conv(dilations = var_2742_dilations_0, groups = var_2742_groups_0, pad = var_2742_pad_0, pad_type = var_2742_pad_type_0, strides = var_2742_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2742_cast_fp16")];
+            string var_2748_pad_type_0 = const()[name = string("op_2748_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2748_strides_0 = const()[name = string("op_2748_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2748_pad_0 = const()[name = string("op_2748_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2748_dilations_0 = const()[name = string("op_2748_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2748_groups_0 = const()[name = string("op_2748_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69175488))), nonzero_data = tensor<fp16, [3188]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69169024))))[name = string("decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2748_cast_fp16 = conv(dilations = var_2748_dilations_0, groups = var_2748_groups_0, pad = var_2748_pad_0, pad_type = var_2748_pad_type_0, strides = var_2748_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2748_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2749_cast_fp16 = add(x = var_2742_cast_fp16, y = var_2748_cast_fp16)[name = string("op_2749_cast_fp16")];
+            string var_2758_pad_type_0 = const()[name = string("op_2758_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2758_strides_0 = const()[name = string("op_2758_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2758_pad_0 = const()[name = string("op_2758_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2758_dilations_0 = const()[name = string("op_2758_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2758_groups_0 = const()[name = string("op_2758_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69249280))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69544256))))[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69544384)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2758_cast_fp16 = conv(bias = decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2758_dilations_0, groups = var_2758_groups_0, pad = var_2758_pad_0, pad_type = var_2758_pad_type_0, strides = var_2758_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2758_cast_fp16")];
+            string var_2764_pad_type_0 = const()[name = string("op_2764_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2764_strides_0 = const()[name = string("op_2764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2764_pad_0 = const()[name = string("op_2764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2764_dilations_0 = const()[name = string("op_2764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2764_groups_0 = const()[name = string("op_2764_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69551232))), nonzero_data = tensor<fp16, [2575]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69545984))))[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2764_cast_fp16 = conv(dilations = var_2764_dilations_0, groups = var_2764_groups_0, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2764_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2764_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2765_cast_fp16 = add(x = var_2758_cast_fp16, y = var_2764_cast_fp16)[name = string("op_2765_cast_fp16")];
+            string var_2785_pad_type_0 = const()[name = string("op_2785_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2785_strides_0 = const()[name = string("op_2785_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2785_pad_0 = const()[name = string("op_2785_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2785_dilations_0 = const()[name = string("op_2785_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2785_groups_0 = const()[name = string("op_2785_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69625024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69920000))))[name = string("decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2785_cast_fp16 = conv(dilations = var_2785_dilations_0, groups = var_2785_groups_0, pad = var_2785_pad_0, pad_type = var_2785_pad_type_0, strides = var_2785_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2785_cast_fp16")];
+            string var_2791_pad_type_0 = const()[name = string("op_2791_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2791_strides_0 = const()[name = string("op_2791_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2791_pad_0 = const()[name = string("op_2791_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2791_dilations_0 = const()[name = string("op_2791_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2791_groups_0 = const()[name = string("op_2791_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69927488))), nonzero_data = tensor<fp16, [3636]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69920128))))[name = string("decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2791_cast_fp16 = conv(dilations = var_2791_dilations_0, groups = var_2791_groups_0, pad = var_2791_pad_0, pad_type = var_2791_pad_type_0, strides = var_2791_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2791_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2792_cast_fp16 = add(x = var_2785_cast_fp16, y = var_2791_cast_fp16)[name = string("op_2792_cast_fp16")];
+            string var_2801_pad_type_0 = const()[name = string("op_2801_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2801_strides_0 = const()[name = string("op_2801_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2801_pad_0 = const()[name = string("op_2801_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2801_dilations_0 = const()[name = string("op_2801_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2801_groups_0 = const()[name = string("op_2801_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70001280))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70296256))))[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70296384)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2801_cast_fp16 = conv(bias = decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2801_dilations_0, groups = var_2801_groups_0, pad = var_2801_pad_0, pad_type = var_2801_pad_type_0, strides = var_2801_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2801_cast_fp16")];
+            string var_2807_pad_type_0 = const()[name = string("op_2807_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2807_strides_0 = const()[name = string("op_2807_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2807_pad_0 = const()[name = string("op_2807_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2807_dilations_0 = const()[name = string("op_2807_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2807_groups_0 = const()[name = string("op_2807_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70307328))), nonzero_data = tensor<fp16, [4639]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70297984))))[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2807_cast_fp16 = conv(dilations = var_2807_dilations_0, groups = var_2807_groups_0, pad = var_2807_pad_0, pad_type = var_2807_pad_type_0, strides = var_2807_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2807_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2808_cast_fp16 = add(x = var_2801_cast_fp16, y = var_2807_cast_fp16)[name = string("op_2808_cast_fp16")];
+            string var_2828_pad_type_0 = const()[name = string("op_2828_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2828_strides_0 = const()[name = string("op_2828_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2828_pad_0 = const()[name = string("op_2828_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2828_dilations_0 = const()[name = string("op_2828_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2828_groups_0 = const()[name = string("op_2828_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70381120))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70676096))))[name = string("decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2828_cast_fp16 = conv(dilations = var_2828_dilations_0, groups = var_2828_groups_0, pad = var_2828_pad_0, pad_type = var_2828_pad_type_0, strides = var_2828_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2828_cast_fp16")];
+            string var_2834_pad_type_0 = const()[name = string("op_2834_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2834_strides_0 = const()[name = string("op_2834_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2834_pad_0 = const()[name = string("op_2834_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2834_dilations_0 = const()[name = string("op_2834_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2834_groups_0 = const()[name = string("op_2834_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70684480))), nonzero_data = tensor<fp16, [4088]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70676224))))[name = string("decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2834_cast_fp16 = conv(dilations = var_2834_dilations_0, groups = var_2834_groups_0, pad = var_2834_pad_0, pad_type = var_2834_pad_type_0, strides = var_2834_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2834_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> k_cast_fp16 = add(x = var_2828_cast_fp16, y = var_2834_cast_fp16)[name = string("k_cast_fp16")];
+            string var_2844_pad_type_0 = const()[name = string("op_2844_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2844_strides_0 = const()[name = string("op_2844_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2844_pad_0 = const()[name = string("op_2844_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2844_dilations_0 = const()[name = string("op_2844_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2844_groups_0 = const()[name = string("op_2844_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70758272))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71053248))))[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71053376)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2844_cast_fp16 = conv(bias = decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2844_dilations_0, groups = var_2844_groups_0, pad = var_2844_pad_0, pad_type = var_2844_pad_type_0, strides = var_2844_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2844_cast_fp16")];
+            string var_2850_pad_type_0 = const()[name = string("op_2850_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2850_strides_0 = const()[name = string("op_2850_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2850_pad_0 = const()[name = string("op_2850_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2850_dilations_0 = const()[name = string("op_2850_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2850_groups_0 = const()[name = string("op_2850_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71063808))), nonzero_data = tensor<fp16, [4356]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71054976))))[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2850_cast_fp16 = conv(dilations = var_2850_dilations_0, groups = var_2850_groups_0, pad = var_2850_pad_0, pad_type = var_2850_pad_type_0, strides = var_2850_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2850_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> v_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2850_cast_fp16)[name = string("v_cast_fp16")];
+            int32 var_2856 = const()[name = string("op_2856"), val = int32(0)];
+            bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)];
+            tensor<fp16, [12, 768, 1, 1500]> input_99_cast_fp16 = concat(axis = var_2856, interleave = input_99_interleave_0, values = (var_2362_cast_fp16, var_2405_cast_fp16, var_2448_cast_fp16, var_2491_cast_fp16, var_2534_cast_fp16, var_2577_cast_fp16, var_2620_cast_fp16, var_2663_cast_fp16, var_2706_cast_fp16, var_2749_cast_fp16, var_2792_cast_fp16, k_cast_fp16))[name = string("input_99_cast_fp16")];
+            int32 var_2859 = const()[name = string("op_2859"), val = int32(0)];
+            bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)];
+            tensor<fp16, [12, 768, 1, 1500]> input_cast_fp16 = concat(axis = var_2859, interleave = input_interleave_0, values = (var_2378_cast_fp16, var_2421_cast_fp16, var_2464_cast_fp16, var_2507_cast_fp16, var_2550_cast_fp16, var_2593_cast_fp16, var_2636_cast_fp16, var_2679_cast_fp16, var_2722_cast_fp16, var_2765_cast_fp16, var_2808_cast_fp16, v_cast_fp16))[name = string("input_cast_fp16")];
+            tensor<int32, [8]> var_2866_pad_0 = const()[name = string("op_2866_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 36])];
+            string var_2866_mode_0 = const()[name = string("op_2866_mode_0"), val = string("constant")];
+            fp16 const_13_to_fp16 = const()[name = string("const_13_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [12, 768, 1, 1536]> encoder_attn_key_cache = pad(constant_val = const_13_to_fp16, mode = var_2866_mode_0, pad = var_2866_pad_0, x = input_99_cast_fp16)[name = string("op_2866_cast_fp16")];
+            tensor<int32, [8]> var_2872_pad_0 = const()[name = string("op_2872_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 36])];
+            string var_2872_mode_0 = const()[name = string("op_2872_mode_0"), val = string("constant")];
+            fp16 const_14_to_fp16 = const()[name = string("const_14_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [12, 768, 1, 1536]> encoder_attn_value_cache = pad(constant_val = const_14_to_fp16, mode = var_2872_mode_0, pad = var_2872_pad_0, x = input_cast_fp16)[name = string("op_2872_cast_fp16")];
+        } -> (encoder_output_embeds, encoder_attn_key_cache, encoder_attn_value_cache);
+}
\ No newline at end of file
diff --git a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mlmodel b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..63d5f6bf5d6a716bf868481f30e158827ceffe0d
--- /dev/null
+++ b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d08f8c4c63c48ed098e6536f5620273f3e661c17523e0861780a99d01a1a3749
+size 370371
diff --git a/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ff765d9338f59b96b7251883c1820302500316c
--- /dev/null
+++ b/openai_whisper-small.en_217MB/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:265b53be8d21fd319531a81bf38db77112bfe068b7b681cbf47b26faccfbee55
+size 71137600
diff --git a/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a11fbb2cd75b96eb2120a672afefa298c2ef857b
--- /dev/null
+++ b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efc05e563ee0c556e3f578e04be5fb67b4e7520124403f2561f39102f0f2b33d
+size 243
diff --git a/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a3544b6644c1af93ca6bdabb67a1c51e80eaa552
--- /dev/null
+++ b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4ef11ea703011eab03287ec661f999e19c2c78cf67d531b5e6afa02e18f913d
+size 328
diff --git a/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..1a60dd494a857817b67d87cd920baa6824e74b61
--- /dev/null
+++ b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,74 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.mul" : 2,
+      "Ios18.square" : 2,
+      "Ios18.conv" : 2,
+      "Ios18.matmul" : 1,
+      "Ios18.expandDims" : 4,
+      "Ios18.sub" : 1,
+      "Ios18.log" : 1,
+      "Ios18.add" : 3,
+      "Ios18.sliceByIndex" : 1,
+      "Ios18.maximum" : 1,
+      "Ios18.squeeze" : 2,
+      "Ios18.reshape" : 2,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Pad" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..e76a2f7f38c466d22bd0ffdc27ef38a01dd51c37
--- /dev/null
+++ b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = string("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = string("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            string input_3_mode_0 = const()[name = string("input_3_mode_0"), val = string("reflect")];
+            fp16 const_1_to_fp16 = const()[name = string("const_1_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = string("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = string("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = string("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = string("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = string("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = string("expand_dims_4_cast_fp16")];
+            string conv_0_pad_type_0 = const()[name = string("conv_0_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = string("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = string("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 conv_0_groups_0 = const()[name = string("conv_0_groups_0"), val = int32(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = string("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = string("conv_0_cast_fp16")];
+            string conv_1_pad_type_0 = const()[name = string("conv_1_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = string("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = string("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 conv_1_groups_0 = const()[name = string("conv_1_groups_0"), val = int32(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = string("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = string("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = string("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = string("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = string("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = string("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = string("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = string("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = string("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = string("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = string("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = string("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = string("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = string("magnitudes_cast_fp16")];
+            bool mel_spec_1_transpose_x_0 = const()[name = string("mel_spec_1_transpose_x_0"), val = bool(false)];
+            bool mel_spec_1_transpose_y_0 = const()[name = string("mel_spec_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [80, 201]> mel_filters_to_fp16 = const()[name = string("mel_filters_to_fp16"), val = tensor<fp16, [80, 201]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321856)))];
+            tensor<fp16, [80, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = string("mel_spec_1_cast_fp16")];
+            fp16 var_41_to_fp16 = const()[name = string("op_41_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [80, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = string("mel_spec_cast_fp16")];
+            fp32 log_0_epsilon_0 = const()[name = string("log_0_epsilon_0"), val = fp32(0x1p-149)];
+            tensor<fp16, [80, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0, x = mel_spec_cast_fp16)[name = string("log_0_cast_fp16")];
+            fp16 mul_0_y_0_to_fp16 = const()[name = string("mul_0_y_0_to_fp16"), val = fp16(0x1.bccp-2)];
+            tensor<fp16, [80, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = string("mul_0_cast_fp16")];
+            bool var_44_keep_dims_0 = const()[name = string("op_44_keep_dims_0"), val = bool(false)];
+            fp16 var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = string("op_44_cast_fp16")];
+            fp16 var_46_to_fp16 = const()[name = string("op_46_to_fp16"), val = fp16(0x1p+3)];
+            fp16 var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = string("op_47_cast_fp16")];
+            tensor<fp16, [80, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = string("log_spec_3_cast_fp16")];
+            fp16 var_50_to_fp16 = const()[name = string("op_50_to_fp16"), val = fp16(0x1p+2)];
+            tensor<fp16, [80, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = string("op_51_cast_fp16")];
+            fp16 _inversed_log_spec_y_0_to_fp16 = const()[name = string("_inversed_log_spec_y_0_to_fp16"), val = fp16(0x1p-2)];
+            tensor<fp16, [80, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = string("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = string("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 80, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = string("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = string("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = string("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f7d28ffac464e9e7086a526930f0059187de8d01
--- /dev/null
+++ b/openai_whisper-small.en_217MB/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:801024dbc7a89c677be1f8b285de3409e35f7d1786c9c8d9d0d6842ac57a1c83
+size 354080
diff --git a/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..802e5e9db88f66915e2f4c50935ebbae30b0bf09
--- /dev/null
+++ b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bba8806a80559fb597de37faaa2740fa1c3a464e2941dac2cd2139dbd5ea70ff
+size 243
diff --git a/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2a559681d08f2ad78842877bb0279ba2e76a65ae
--- /dev/null
+++ b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7af00de6eebff972d49c192df21fdcc49dc037f8043e50e89fdd8e3831e1a8e8
+size 754
diff --git a/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/metadata.json b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf1cfca110df1c9ea346ce0f0c4723ff57fcbe98
--- /dev/null
+++ b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,185 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (4 bits), Sparse)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51864)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51864]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 8,
+      "Ios18.softmax" : 24,
+      "Ios18.mul" : 48,
+      "Ios18.matmul" : 48,
+      "Ios18.batchNorm" : 37,
+      "Ios16.reduceMean" : 1,
+      "Split" : 2,
+      "Ios18.readState" : 5,
+      "Ios18.gather" : 3,
+      "Ios18.add" : 182,
+      "Ios18.layerNorm" : 37,
+      "Ios18.reshape" : 96,
+      "Ios18.constexprLutToDense" : 96,
+      "Ios18.constexprSparseToDense" : 97,
+      "Ios18.conv" : 192,
+      "Ios18.gelu" : 12,
+      "Ios18.linear" : 1,
+      "Ios18.cast" : 1,
+      "Ios18.transpose" : 1,
+      "Ios18.concat" : 3,
+      "Ios18.sliceByIndex" : 62,
+      "Ios18.squeeze" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536]",
+        "name" : "encoder_attn_key_padding_mask",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 768 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 1536]",
+        "name" : "encoder_attn_key_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 768 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 1536]",
+        "name" : "encoder_attn_value_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 768 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 448]",
+        "name" : "self_attn_key_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 768 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 448]",
+        "name" : "self_attn_value_cache",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoderStateful",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mil b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..b05b9c0a190c68a9406deb1e61c78fbb59134838
--- /dev/null
+++ b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,2674 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 448]> decoder_key_padding_mask, state<tensor<fp16, [12, 768, 1, 1536]>> encoder_attn_key_cache, state<tensor<fp16, [1, 1536]>> encoder_attn_key_padding_mask, state<tensor<fp16, [12, 768, 1, 1536]>> encoder_attn_value_cache, tensor<int32, [1]> input_ids, tensor<fp16, [1, 448]> kv_cache_update_mask, state<tensor<fp16, [12, 768, 1, 448]>> self_attn_key_cache, state<tensor<fp16, [12, 768, 1, 448]>> self_attn_value_cache) {
+            int32 var_42_axis_0 = const()[name = string("op_42_axis_0"), val = int32(0)];
+            int32 var_42_batch_dims_0 = const()[name = string("op_42_batch_dims_0"), val = int32(0)];
+            bool var_42_validate_indices_0 = const()[name = string("op_42_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51864, 768]> embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51864, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, 768]> var_42_cast_fp16 = gather(axis = var_42_axis_0, batch_dims = var_42_batch_dims_0, indices = input_ids, validate_indices = var_42_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_42_cast_fp16")];
+            int32 var_49_axis_0 = const()[name = string("op_49_axis_0"), val = int32(0)];
+            int32 var_49_batch_dims_0 = const()[name = string("op_49_batch_dims_0"), val = int32(0)];
+            bool var_49_validate_indices_0 = const()[name = string("op_49_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [448, 768]> embed_positions_inlier_module_weight_to_fp16 = const()[name = string("embed_positions_inlier_module_weight_to_fp16"), val = tensor<fp16, [448, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79663232)))];
+            string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")];
+            tensor<uint16, [1]> cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_0")];
+            tensor<fp16, [1, 768]> var_49_cast_fp16_cast_uint16 = gather(axis = var_49_axis_0, batch_dims = var_49_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_49_validate_indices_0, x = embed_positions_inlier_module_weight_to_fp16)[name = string("op_49_cast_fp16_cast_uint16")];
+            int32 var_51_axis_0 = const()[name = string("op_51_axis_0"), val = int32(0)];
+            int32 var_51_batch_dims_0 = const()[name = string("op_51_batch_dims_0"), val = int32(0)];
+            bool var_51_validate_indices_0 = const()[name = string("op_51_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [448, 768]> embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [448, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80356800))), nonzero_data = tensor<fp16, [2652]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80351424))))[name = string("embed_positions_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768]> var_51_cast_fp16_cast_uint16 = gather(axis = var_51_axis_0, batch_dims = var_51_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_51_validate_indices_0, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = string("op_51_cast_fp16_cast_uint16")];
+            tensor<fp16, [1, 768]> var_52_cast_fp16 = add(x = var_49_cast_fp16_cast_uint16, y = var_51_cast_fp16_cast_uint16)[name = string("op_52_cast_fp16")];
+            tensor<fp16, [1, 768]> hidden_states_1_cast_fp16 = add(x = var_42_cast_fp16, y = var_52_cast_fp16)[name = string("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_66_axes_0 = const()[name = string("op_66_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1]> var_66_cast_fp16 = expand_dims(axes = var_66_axes_0, x = hidden_states_1_cast_fp16)[name = string("op_66_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = string("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 768, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_66_cast_fp16)[name = string("inputs_1_cast_fp16")];
+            tensor<fp16, [12, 768, 1, 448]> read_state_0 = read_state(input = self_attn_key_cache)[name = string("read_state_0")];
+            tensor<int32, [12]> tile_0 = const()[name = string("tile_0"), val = tensor<int32, [12]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80399872)))];
+            int32 var_71_axis_0 = const()[name = string("op_71_axis_0"), val = int32(0)];
+            tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_0, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_1, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_2, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_3, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_4, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_5, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_6, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_7, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_8, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_9, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_10, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_11 = split(axis = var_71_axis_0, split_sizes = tile_0, x = read_state_0)[name = string("op_71_cast_fp16")];
+            tensor<fp16, [12, 768, 1, 448]> read_state_1 = read_state(input = self_attn_value_cache)[name = string("read_state_1")];
+            tensor<int32, [12]> tile_1 = const()[name = string("tile_1"), val = tensor<int32, [12]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80400000)))];
+            int32 var_86_axis_0 = const()[name = string("op_86_axis_0"), val = int32(0)];
+            tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_0, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_1, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_2, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_3, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_4, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_5, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_6, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_7, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_8, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_9, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_10, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_11 = split(axis = var_86_axis_0, split_sizes = tile_1, x = read_state_1)[name = string("op_86_cast_fp16")];
+            tensor<fp16, [12, 768, 1, 1536]> read_state_2 = read_state(input = encoder_attn_key_cache)[name = string("read_state_2")];
+            tensor<int32, [4]> obj_17_begin_0 = const()[name = string("obj_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> obj_17_end_0 = const()[name = string("obj_17_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1536])];
+            tensor<bool, [4]> obj_17_end_mask_0 = const()[name = string("obj_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_17_cast_fp16 = slice_by_index(begin = obj_17_begin_0, end = obj_17_end_0, end_mask = obj_17_end_mask_0, x = read_state_2)[name = string("obj_17_cast_fp16")];
+            tensor<fp16, [12, 768, 1, 1536]> read_state_3 = read_state(input = encoder_attn_value_cache)[name = string("read_state_3")];
+            tensor<int32, [4]> obj_19_begin_0 = const()[name = string("obj_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> obj_19_end_0 = const()[name = string("obj_19_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1536])];
+            tensor<bool, [4]> obj_19_end_mask_0 = const()[name = string("obj_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_19_cast_fp16 = slice_by_index(begin = obj_19_begin_0, end = obj_19_end_0, end_mask = obj_19_end_mask_0, x = read_state_3)[name = string("obj_19_cast_fp16")];
+            int32 var_114 = const()[name = string("op_114"), val = int32(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_139_to_fp16 = const()[name = string("op_139_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_139_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")];
+            tensor<fp16, [768]> obj_5_mean_0_to_fp16 = const()[name = string("obj_5_mean_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80400128)))];
+            tensor<fp16, [768]> obj_5_variance_0_to_fp16 = const()[name = string("obj_5_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80401728)))];
+            tensor<fp16, [768]> obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80403328)))];
+            tensor<fp16, [768]> obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80404928)))];
+            fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_5_cast_fp16")];
+            string var_161_pad_type_0 = const()[name = string("op_161_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_161_strides_0 = const()[name = string("op_161_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_161_pad_0 = const()[name = string("op_161_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_161_dilations_0 = const()[name = string("op_161_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_161_groups_0 = const()[name = string("op_161_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80406528))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80701504))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80701632)))];
+            tensor<fp16, [1, 768, 1, 1]> var_161_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_161_dilations_0, groups = var_161_groups_0, pad = var_161_pad_0, pad_type = var_161_pad_type_0, strides = var_161_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_161_cast_fp16")];
+            string var_167_pad_type_0 = const()[name = string("op_167_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_167_strides_0 = const()[name = string("op_167_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_167_pad_0 = const()[name = string("op_167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_167_dilations_0 = const()[name = string("op_167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_167_groups_0 = const()[name = string("op_167_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80717440))), nonzero_data = tensor<fp16, [7059]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80703232))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_167_cast_fp16 = conv(dilations = var_167_dilations_0, groups = var_167_groups_0, pad = var_167_pad_0, pad_type = var_167_pad_type_0, strides = var_167_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_167_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_1_cast_fp16 = add(x = var_161_cast_fp16, y = var_167_cast_fp16)[name = string("query_1_cast_fp16")];
+            string var_176_pad_type_0 = const()[name = string("op_176_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_176_strides_0 = const()[name = string("op_176_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_176_pad_0 = const()[name = string("op_176_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_176_dilations_0 = const()[name = string("op_176_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_176_groups_0 = const()[name = string("op_176_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80791232))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81086208))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_176_cast_fp16 = conv(dilations = var_176_dilations_0, groups = var_176_groups_0, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_176_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_176_cast_fp16")];
+            string var_182_pad_type_0 = const()[name = string("op_182_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_182_strides_0 = const()[name = string("op_182_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_182_pad_0 = const()[name = string("op_182_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_182_dilations_0 = const()[name = string("op_182_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_182_groups_0 = const()[name = string("op_182_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81101248))), nonzero_data = tensor<fp16, [7418]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81086336))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_182_cast_fp16 = conv(dilations = var_182_dilations_0, groups = var_182_groups_0, pad = var_182_pad_0, pad_type = var_182_pad_type_0, strides = var_182_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_182_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_1_cast_fp16 = add(x = var_176_cast_fp16, y = var_182_cast_fp16)[name = string("current_key_1_cast_fp16")];
+            string var_192_pad_type_0 = const()[name = string("op_192_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_192_strides_0 = const()[name = string("op_192_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_192_pad_0 = const()[name = string("op_192_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_192_dilations_0 = const()[name = string("op_192_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_192_groups_0 = const()[name = string("op_192_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81175040))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81470016))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81470144)))];
+            tensor<fp16, [1, 768, 1, 1]> var_192_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_192_dilations_0, groups = var_192_groups_0, pad = var_192_pad_0, pad_type = var_192_pad_type_0, strides = var_192_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_192_cast_fp16")];
+            string var_198_pad_type_0 = const()[name = string("op_198_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_198_strides_0 = const()[name = string("op_198_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_198_pad_0 = const()[name = string("op_198_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_198_dilations_0 = const()[name = string("op_198_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_198_groups_0 = const()[name = string("op_198_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81480256))), nonzero_data = tensor<fp16, [4201]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81471744))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_198_cast_fp16 = conv(dilations = var_198_dilations_0, groups = var_198_groups_0, pad = var_198_pad_0, pad_type = var_198_pad_type_0, strides = var_198_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_198_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_1_cast_fp16 = add(x = var_192_cast_fp16, y = var_198_cast_fp16)[name = string("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_201_axes_0 = const()[name = string("op_201_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_201_cast_fp16 = expand_dims(axes = var_201_axes_0, x = kv_cache_update_mask)[name = string("op_201_cast_fp16")];
+            tensor<int32, [1]> var_202_axes_0 = const()[name = string("op_202_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_202_cast_fp16 = expand_dims(axes = var_202_axes_0, x = var_201_cast_fp16)[name = string("op_202_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_204_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_202_cast_fp16)[name = string("op_204_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_1_cast_fp16 = add(x = var_71_cast_fp16_0, y = var_204_cast_fp16)[name = string("key_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_206_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_202_cast_fp16)[name = string("op_206_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_1_cast_fp16 = add(x = var_86_cast_fp16_0, y = var_206_cast_fp16)[name = string("value_1_cast_fp16")];
+            tensor<int32, [4]> var_209 = const()[name = string("op_209"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_1_cast_fp16 = reshape(shape = var_209, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")];
+            fp16 var_211_to_fp16 = const()[name = string("op_211_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_212_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_211_to_fp16)[name = string("op_212_cast_fp16")];
+            tensor<int32, [4]> var_213 = const()[name = string("op_213"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_214_cast_fp16 = reshape(shape = var_213, x = key_1_cast_fp16)[name = string("op_214_cast_fp16")];
+            bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)];
+            bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_212_cast_fp16, y = var_214_cast_fp16)[name = string("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_218_axes_0 = const()[name = string("op_218_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_218_cast_fp16 = expand_dims(axes = var_218_axes_0, x = decoder_key_padding_mask)[name = string("op_218_cast_fp16")];
+            tensor<int32, [1]> var_219_axes_0 = const()[name = string("op_219_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_219_cast_fp16 = expand_dims(axes = var_219_axes_0, x = var_218_cast_fp16)[name = string("op_219_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_222_cast_fp16 = softmax(axis = var_114, x = mh_w_3_cast_fp16)[name = string("op_222_cast_fp16")];
+            tensor<int32, [4]> var_223 = const()[name = string("op_223"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_224_cast_fp16 = reshape(shape = var_223, x = value_1_cast_fp16)[name = string("op_224_cast_fp16")];
+            bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)];
+            bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_224_cast_fp16, y = var_222_cast_fp16)[name = string("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_1_cast_fp16 = reshape(shape = var_227, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_237_pad_type_0 = const()[name = string("op_237_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_237_strides_0 = const()[name = string("op_237_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_237_pad_0 = const()[name = string("op_237_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_237_dilations_0 = const()[name = string("op_237_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_237_groups_0 = const()[name = string("op_237_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81554048))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81849024))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81849152)))];
+            tensor<fp16, [1, 768, 1, 1]> var_237_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_237_dilations_0, groups = var_237_groups_0, pad = var_237_pad_0, pad_type = var_237_pad_type_0, strides = var_237_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_237_cast_fp16")];
+            string var_243_pad_type_0 = const()[name = string("op_243_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_243_strides_0 = const()[name = string("op_243_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_243_pad_0 = const()[name = string("op_243_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_243_dilations_0 = const()[name = string("op_243_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_243_groups_0 = const()[name = string("op_243_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81860992))), nonzero_data = tensor<fp16, [5067]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81850752))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_243_cast_fp16 = conv(dilations = var_243_dilations_0, groups = var_243_groups_0, pad = var_243_pad_0, pad_type = var_243_pad_type_0, strides = var_243_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_243_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_11_cast_fp16 = add(x = var_237_cast_fp16, y = var_243_cast_fp16)[name = string("obj_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_258_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")];
+            tensor<fp16, [768]> obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81934784)))];
+            tensor<fp16, [768]> obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81936384)))];
+            fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("obj_13_cast_fp16")];
+            string var_278_pad_type_0 = const()[name = string("op_278_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_278_strides_0 = const()[name = string("op_278_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_278_pad_0 = const()[name = string("op_278_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_278_dilations_0 = const()[name = string("op_278_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_278_groups_0 = const()[name = string("op_278_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81937984))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82232960))))[name = string("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82233088)))];
+            tensor<fp16, [1, 768, 1, 1]> var_278_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_278_dilations_0, groups = var_278_groups_0, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_278_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_278_cast_fp16")];
+            string var_284_pad_type_0 = const()[name = string("op_284_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_284_strides_0 = const()[name = string("op_284_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_284_pad_0 = const()[name = string("op_284_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_284_dilations_0 = const()[name = string("op_284_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_284_groups_0 = const()[name = string("op_284_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82258496))), nonzero_data = tensor<fp16, [11855]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82234688))))[name = string("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_284_cast_fp16 = conv(dilations = var_284_dilations_0, groups = var_284_groups_0, pad = var_284_pad_0, pad_type = var_284_pad_type_0, strides = var_284_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_284_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_3_cast_fp16 = add(x = var_278_cast_fp16, y = var_284_cast_fp16)[name = string("query_3_cast_fp16")];
+            tensor<int32, [4]> var_287 = const()[name = string("op_287"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_3_cast_fp16 = reshape(shape = var_287, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")];
+            fp16 var_289_to_fp16 = const()[name = string("op_289_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_290_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_289_to_fp16)[name = string("op_290_cast_fp16")];
+            tensor<int32, [4]> var_291 = const()[name = string("op_291"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_292_cast_fp16 = reshape(shape = var_291, x = obj_17_cast_fp16)[name = string("op_292_cast_fp16")];
+            bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)];
+            bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_290_cast_fp16, y = var_292_cast_fp16)[name = string("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 1536]> read_state_4 = read_state(input = encoder_attn_key_padding_mask)[name = string("read_state_4")];
+            tensor<int32, [1]> var_296_axes_0 = const()[name = string("op_296_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1536]> var_296_cast_fp16 = expand_dims(axes = var_296_axes_0, x = read_state_4)[name = string("op_296_cast_fp16")];
+            tensor<int32, [1]> var_297_axes_0 = const()[name = string("op_297_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 1536]> var_297_cast_fp16 = expand_dims(axes = var_297_axes_0, x = var_296_cast_fp16)[name = string("op_297_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_23_cast_fp16 = softmax(axis = var_114, x = mh_w_7_cast_fp16)[name = string("obj_23_cast_fp16")];
+            tensor<int32, [4]> var_301 = const()[name = string("op_301"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_302_cast_fp16 = reshape(shape = var_301, x = obj_19_cast_fp16)[name = string("op_302_cast_fp16")];
+            bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)];
+            bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_302_cast_fp16, y = obj_23_cast_fp16)[name = string("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_305 = const()[name = string("op_305"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_3_cast_fp16 = reshape(shape = var_305, x = attn_3_cast_fp16)[name = string("input_3_cast_fp16")];
+            string var_315_pad_type_0 = const()[name = string("op_315_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_315_strides_0 = const()[name = string("op_315_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_315_pad_0 = const()[name = string("op_315_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_315_dilations_0 = const()[name = string("op_315_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_315_groups_0 = const()[name = string("op_315_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82332288))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82627264))))[name = string("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82627392)))];
+            tensor<fp16, [1, 768, 1, 1]> var_315_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_315_dilations_0, groups = var_315_groups_0, pad = var_315_pad_0, pad_type = var_315_pad_type_0, strides = var_315_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_315_cast_fp16")];
+            string var_321_pad_type_0 = const()[name = string("op_321_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_321_strides_0 = const()[name = string("op_321_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_321_pad_0 = const()[name = string("op_321_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_321_dilations_0 = const()[name = string("op_321_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_321_groups_0 = const()[name = string("op_321_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82636736))), nonzero_data = tensor<fp16, [3825]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82628992))))[name = string("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_321_cast_fp16 = conv(dilations = var_321_dilations_0, groups = var_321_groups_0, pad = var_321_pad_0, pad_type = var_321_pad_type_0, strides = var_321_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_321_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_21_cast_fp16 = add(x = var_315_cast_fp16, y = var_321_cast_fp16)[name = string("obj_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_21_cast_fp16)[name = string("inputs_5_cast_fp16")];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_332_to_fp16 = const()[name = string("op_332_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_332_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")];
+            tensor<fp16, [768]> input_5_gamma_0_to_fp16 = const()[name = string("input_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82710528)))];
+            tensor<fp16, [768]> input_5_beta_0_to_fp16 = const()[name = string("input_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82712128)))];
+            fp16 input_5_epsilon_0_to_fp16 = const()[name = string("input_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("input_5_cast_fp16")];
+            string var_350_pad_type_0 = const()[name = string("op_350_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_350_strides_0 = const()[name = string("op_350_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_350_pad_0 = const()[name = string("op_350_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_350_dilations_0 = const()[name = string("op_350_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_350_groups_0 = const()[name = string("op_350_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82713728))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83893440))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83893568)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_350_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_350_dilations_0, groups = var_350_groups_0, pad = var_350_pad_0, pad_type = var_350_pad_type_0, strides = var_350_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("op_350_cast_fp16")];
+            string var_356_pad_type_0 = const()[name = string("op_356_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_356_strides_0 = const()[name = string("op_356_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_356_pad_0 = const()[name = string("op_356_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_356_dilations_0 = const()[name = string("op_356_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_356_groups_0 = const()[name = string("op_356_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83956160))), nonzero_data = tensor<fp16, [28153]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83899776))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_356_cast_fp16 = conv(dilations = var_356_dilations_0, groups = var_356_groups_0, pad = var_356_pad_0, pad_type = var_356_pad_type_0, strides = var_356_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = string("op_356_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_7_cast_fp16 = add(x = var_350_cast_fp16, y = var_356_cast_fp16)[name = string("input_7_cast_fp16")];
+            string input_9_mode_0 = const()[name = string("input_9_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = string("input_9_cast_fp16")];
+            string var_367_pad_type_0 = const()[name = string("op_367_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_367_strides_0 = const()[name = string("op_367_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_367_pad_0 = const()[name = string("op_367_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_367_dilations_0 = const()[name = string("op_367_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_367_groups_0 = const()[name = string("op_367_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84251136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85430848))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85430976)))];
+            tensor<fp16, [1, 768, 1, 1]> var_367_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_367_dilations_0, groups = var_367_groups_0, pad = var_367_pad_0, pad_type = var_367_pad_type_0, strides = var_367_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_367_cast_fp16")];
+            string var_373_pad_type_0 = const()[name = string("op_373_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_373_strides_0 = const()[name = string("op_373_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_373_pad_0 = const()[name = string("op_373_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_373_dilations_0 = const()[name = string("op_373_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_373_groups_0 = const()[name = string("op_373_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85482112))), nonzero_data = tensor<fp16, [24719]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85432576))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_373_cast_fp16 = conv(dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_373_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_3_cast_fp16 = add(x = var_367_cast_fp16, y = var_373_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = string("inputs_7_cast_fp16")];
+            tensor<int32, [4]> obj_35_begin_0 = const()[name = string("obj_35_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> obj_35_end_0 = const()[name = string("obj_35_end_0"), val = tensor<int32, [4]>([2, 768, 1, 1536])];
+            tensor<bool, [4]> obj_35_end_mask_0 = const()[name = string("obj_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_35_cast_fp16 = slice_by_index(begin = obj_35_begin_0, end = obj_35_end_0, end_mask = obj_35_end_mask_0, x = read_state_2)[name = string("obj_35_cast_fp16")];
+            tensor<int32, [4]> obj_37_begin_0 = const()[name = string("obj_37_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> obj_37_end_0 = const()[name = string("obj_37_end_0"), val = tensor<int32, [4]>([2, 768, 1, 1536])];
+            tensor<bool, [4]> obj_37_end_mask_0 = const()[name = string("obj_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_37_cast_fp16 = slice_by_index(begin = obj_37_begin_0, end = obj_37_end_0, end_mask = obj_37_end_mask_0, x = read_state_3)[name = string("obj_37_cast_fp16")];
+            int32 var_395 = const()[name = string("op_395"), val = int32(3)];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_420_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")];
+            tensor<fp16, [768]> obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85777088)))];
+            tensor<fp16, [768]> obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85778688)))];
+            fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("obj_25_cast_fp16")];
+            string var_442_pad_type_0 = const()[name = string("op_442_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_442_strides_0 = const()[name = string("op_442_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_442_pad_0 = const()[name = string("op_442_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_442_dilations_0 = const()[name = string("op_442_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_442_groups_0 = const()[name = string("op_442_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85780288))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86075264))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86075392)))];
+            tensor<fp16, [1, 768, 1, 1]> var_442_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_442_dilations_0, groups = var_442_groups_0, pad = var_442_pad_0, pad_type = var_442_pad_type_0, strides = var_442_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_442_cast_fp16")];
+            string var_448_pad_type_0 = const()[name = string("op_448_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_448_strides_0 = const()[name = string("op_448_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_448_pad_0 = const()[name = string("op_448_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_448_dilations_0 = const()[name = string("op_448_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_448_groups_0 = const()[name = string("op_448_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86097920))), nonzero_data = tensor<fp16, [10431]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86076992))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_448_cast_fp16 = conv(dilations = var_448_dilations_0, groups = var_448_groups_0, pad = var_448_pad_0, pad_type = var_448_pad_type_0, strides = var_448_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_448_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_5_cast_fp16 = add(x = var_442_cast_fp16, y = var_448_cast_fp16)[name = string("query_5_cast_fp16")];
+            string var_457_pad_type_0 = const()[name = string("op_457_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_457_strides_0 = const()[name = string("op_457_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_457_pad_0 = const()[name = string("op_457_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_457_dilations_0 = const()[name = string("op_457_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_457_groups_0 = const()[name = string("op_457_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86171712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86466688))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_457_cast_fp16 = conv(dilations = var_457_dilations_0, groups = var_457_groups_0, pad = var_457_pad_0, pad_type = var_457_pad_type_0, strides = var_457_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_457_cast_fp16")];
+            string var_463_pad_type_0 = const()[name = string("op_463_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_463_strides_0 = const()[name = string("op_463_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_463_pad_0 = const()[name = string("op_463_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_463_dilations_0 = const()[name = string("op_463_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_463_groups_0 = const()[name = string("op_463_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86483968))), nonzero_data = tensor<fp16, [8539]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86466816))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_463_cast_fp16 = conv(dilations = var_463_dilations_0, groups = var_463_groups_0, pad = var_463_pad_0, pad_type = var_463_pad_type_0, strides = var_463_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_463_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_3_cast_fp16 = add(x = var_457_cast_fp16, y = var_463_cast_fp16)[name = string("current_key_3_cast_fp16")];
+            string var_473_pad_type_0 = const()[name = string("op_473_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_473_strides_0 = const()[name = string("op_473_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_473_pad_0 = const()[name = string("op_473_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_473_dilations_0 = const()[name = string("op_473_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_473_groups_0 = const()[name = string("op_473_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86557760))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86852736))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86852864)))];
+            tensor<fp16, [1, 768, 1, 1]> var_473_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_473_cast_fp16")];
+            string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86878080))), nonzero_data = tensor<fp16, [11750]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86854464))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_479_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = string("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_485_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_202_cast_fp16)[name = string("op_485_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_3_cast_fp16 = add(x = var_71_cast_fp16_1, y = var_485_cast_fp16)[name = string("key_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_487_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_202_cast_fp16)[name = string("op_487_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_3_cast_fp16 = add(x = var_86_cast_fp16_1, y = var_487_cast_fp16)[name = string("value_3_cast_fp16")];
+            tensor<int32, [4]> var_490 = const()[name = string("op_490"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_5_cast_fp16 = reshape(shape = var_490, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")];
+            fp16 var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_493_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_492_to_fp16)[name = string("op_493_cast_fp16")];
+            tensor<int32, [4]> var_494 = const()[name = string("op_494"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_495_cast_fp16 = reshape(shape = var_494, x = key_3_cast_fp16)[name = string("op_495_cast_fp16")];
+            bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)];
+            bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_493_cast_fp16, y = var_495_cast_fp16)[name = string("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_503_cast_fp16 = softmax(axis = var_395, x = mh_w_11_cast_fp16)[name = string("op_503_cast_fp16")];
+            tensor<int32, [4]> var_504 = const()[name = string("op_504"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_505_cast_fp16 = reshape(shape = var_504, x = value_3_cast_fp16)[name = string("op_505_cast_fp16")];
+            bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)];
+            bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_505_cast_fp16, y = var_503_cast_fp16)[name = string("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_508 = const()[name = string("op_508"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_11_cast_fp16 = reshape(shape = var_508, x = attn_5_cast_fp16)[name = string("input_11_cast_fp16")];
+            string var_518_pad_type_0 = const()[name = string("op_518_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_518_strides_0 = const()[name = string("op_518_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_518_pad_0 = const()[name = string("op_518_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_518_dilations_0 = const()[name = string("op_518_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_518_groups_0 = const()[name = string("op_518_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86951872))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87246848))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87246976)))];
+            tensor<fp16, [1, 768, 1, 1]> var_518_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_518_dilations_0, groups = var_518_groups_0, pad = var_518_pad_0, pad_type = var_518_pad_type_0, strides = var_518_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_518_cast_fp16")];
+            string var_524_pad_type_0 = const()[name = string("op_524_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_524_strides_0 = const()[name = string("op_524_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_524_pad_0 = const()[name = string("op_524_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_524_dilations_0 = const()[name = string("op_524_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_524_groups_0 = const()[name = string("op_524_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87270528))), nonzero_data = tensor<fp16, [10924]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87248576))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_524_cast_fp16 = conv(dilations = var_524_dilations_0, groups = var_524_groups_0, pad = var_524_pad_0, pad_type = var_524_pad_type_0, strides = var_524_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_524_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_31_cast_fp16 = add(x = var_518_cast_fp16, y = var_524_cast_fp16)[name = string("obj_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_9_cast_fp16")];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_539_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")];
+            tensor<fp16, [768]> obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87344320)))];
+            tensor<fp16, [768]> obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87345920)))];
+            fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_33_cast_fp16")];
+            string var_559_pad_type_0 = const()[name = string("op_559_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_559_strides_0 = const()[name = string("op_559_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_559_pad_0 = const()[name = string("op_559_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_559_dilations_0 = const()[name = string("op_559_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_559_groups_0 = const()[name = string("op_559_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87347520))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87642496))))[name = string("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87642624)))];
+            tensor<fp16, [1, 768, 1, 1]> var_559_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_559_dilations_0, groups = var_559_groups_0, pad = var_559_pad_0, pad_type = var_559_pad_type_0, strides = var_559_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_559_cast_fp16")];
+            string var_565_pad_type_0 = const()[name = string("op_565_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_565_strides_0 = const()[name = string("op_565_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_565_pad_0 = const()[name = string("op_565_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_565_dilations_0 = const()[name = string("op_565_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_565_groups_0 = const()[name = string("op_565_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87655552))), nonzero_data = tensor<fp16, [5612]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87644224))))[name = string("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_565_cast_fp16 = conv(dilations = var_565_dilations_0, groups = var_565_groups_0, pad = var_565_pad_0, pad_type = var_565_pad_type_0, strides = var_565_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_565_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_7_cast_fp16 = add(x = var_559_cast_fp16, y = var_565_cast_fp16)[name = string("query_7_cast_fp16")];
+            tensor<int32, [4]> var_568 = const()[name = string("op_568"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_7_cast_fp16 = reshape(shape = var_568, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")];
+            fp16 var_570_to_fp16 = const()[name = string("op_570_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_571_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_570_to_fp16)[name = string("op_571_cast_fp16")];
+            tensor<int32, [4]> var_572 = const()[name = string("op_572"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_573_cast_fp16 = reshape(shape = var_572, x = obj_35_cast_fp16)[name = string("op_573_cast_fp16")];
+            bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)];
+            bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_571_cast_fp16, y = var_573_cast_fp16)[name = string("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_41_cast_fp16 = softmax(axis = var_395, x = mh_w_15_cast_fp16)[name = string("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_582 = const()[name = string("op_582"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_583_cast_fp16 = reshape(shape = var_582, x = obj_37_cast_fp16)[name = string("op_583_cast_fp16")];
+            bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)];
+            bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_583_cast_fp16, y = obj_41_cast_fp16)[name = string("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_586 = const()[name = string("op_586"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_13_cast_fp16 = reshape(shape = var_586, x = attn_7_cast_fp16)[name = string("input_13_cast_fp16")];
+            string var_596_pad_type_0 = const()[name = string("op_596_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_596_strides_0 = const()[name = string("op_596_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_596_pad_0 = const()[name = string("op_596_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_596_dilations_0 = const()[name = string("op_596_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_596_groups_0 = const()[name = string("op_596_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87729344))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88024320))))[name = string("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88024448)))];
+            tensor<fp16, [1, 768, 1, 1]> var_596_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_596_dilations_0, groups = var_596_groups_0, pad = var_596_pad_0, pad_type = var_596_pad_type_0, strides = var_596_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("op_596_cast_fp16")];
+            string var_602_pad_type_0 = const()[name = string("op_602_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_602_strides_0 = const()[name = string("op_602_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_602_pad_0 = const()[name = string("op_602_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_602_dilations_0 = const()[name = string("op_602_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_602_groups_0 = const()[name = string("op_602_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88032448))), nonzero_data = tensor<fp16, [3164]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88026048))))[name = string("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_602_cast_fp16 = conv(dilations = var_602_dilations_0, groups = var_602_groups_0, pad = var_602_pad_0, pad_type = var_602_pad_type_0, strides = var_602_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = string("op_602_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_39_cast_fp16 = add(x = var_596_cast_fp16, y = var_602_cast_fp16)[name = string("obj_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_613_to_fp16 = const()[name = string("op_613_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_613_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")];
+            tensor<fp16, [768]> input_15_gamma_0_to_fp16 = const()[name = string("input_15_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88106240)))];
+            tensor<fp16, [768]> input_15_beta_0_to_fp16 = const()[name = string("input_15_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88107840)))];
+            fp16 input_15_epsilon_0_to_fp16 = const()[name = string("input_15_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_15_cast_fp16")];
+            string var_631_pad_type_0 = const()[name = string("op_631_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_631_strides_0 = const()[name = string("op_631_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_631_pad_0 = const()[name = string("op_631_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_631_dilations_0 = const()[name = string("op_631_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_631_groups_0 = const()[name = string("op_631_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88109440))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89289152))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89289280)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_631_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_631_dilations_0, groups = var_631_groups_0, pad = var_631_pad_0, pad_type = var_631_pad_type_0, strides = var_631_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_631_cast_fp16")];
+            string var_637_pad_type_0 = const()[name = string("op_637_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_637_strides_0 = const()[name = string("op_637_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_637_pad_0 = const()[name = string("op_637_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_637_dilations_0 = const()[name = string("op_637_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_637_groups_0 = const()[name = string("op_637_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89384960))), nonzero_data = tensor<fp16, [44701]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89295488))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_637_cast_fp16 = conv(dilations = var_637_dilations_0, groups = var_637_groups_0, pad = var_637_pad_0, pad_type = var_637_pad_type_0, strides = var_637_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_637_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_17_cast_fp16 = add(x = var_631_cast_fp16, y = var_637_cast_fp16)[name = string("input_17_cast_fp16")];
+            string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")];
+            string var_648_pad_type_0 = const()[name = string("op_648_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_648_strides_0 = const()[name = string("op_648_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_648_pad_0 = const()[name = string("op_648_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_648_dilations_0 = const()[name = string("op_648_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_648_groups_0 = const()[name = string("op_648_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89679936))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90859648))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90859776)))];
+            tensor<fp16, [1, 768, 1, 1]> var_648_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_648_dilations_0, groups = var_648_groups_0, pad = var_648_pad_0, pad_type = var_648_pad_type_0, strides = var_648_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_648_cast_fp16")];
+            string var_654_pad_type_0 = const()[name = string("op_654_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_654_strides_0 = const()[name = string("op_654_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_654_pad_0 = const()[name = string("op_654_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_654_dilations_0 = const()[name = string("op_654_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_654_groups_0 = const()[name = string("op_654_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90936128))), nonzero_data = tensor<fp16, [37336]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90861376))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_654_cast_fp16 = conv(dilations = var_654_dilations_0, groups = var_654_groups_0, pad = var_654_pad_0, pad_type = var_654_pad_type_0, strides = var_654_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_654_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_5_cast_fp16 = add(x = var_648_cast_fp16, y = var_654_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_13_cast_fp16")];
+            tensor<int32, [4]> obj_53_begin_0 = const()[name = string("obj_53_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> obj_53_end_0 = const()[name = string("obj_53_end_0"), val = tensor<int32, [4]>([3, 768, 1, 1536])];
+            tensor<bool, [4]> obj_53_end_mask_0 = const()[name = string("obj_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_53_cast_fp16 = slice_by_index(begin = obj_53_begin_0, end = obj_53_end_0, end_mask = obj_53_end_mask_0, x = read_state_2)[name = string("obj_53_cast_fp16")];
+            tensor<int32, [4]> obj_55_begin_0 = const()[name = string("obj_55_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> obj_55_end_0 = const()[name = string("obj_55_end_0"), val = tensor<int32, [4]>([3, 768, 1, 1536])];
+            tensor<bool, [4]> obj_55_end_mask_0 = const()[name = string("obj_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_55_cast_fp16 = slice_by_index(begin = obj_55_begin_0, end = obj_55_end_0, end_mask = obj_55_end_mask_0, x = read_state_3)[name = string("obj_55_cast_fp16")];
+            int32 var_676 = const()[name = string("op_676"), val = int32(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_701_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")];
+            tensor<fp16, [768]> obj_43_gamma_0_to_fp16 = const()[name = string("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91231104)))];
+            tensor<fp16, [768]> obj_43_beta_0_to_fp16 = const()[name = string("obj_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91232704)))];
+            fp16 obj_43_epsilon_0_to_fp16 = const()[name = string("obj_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_43_cast_fp16")];
+            string var_723_pad_type_0 = const()[name = string("op_723_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_723_strides_0 = const()[name = string("op_723_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_723_pad_0 = const()[name = string("op_723_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_723_dilations_0 = const()[name = string("op_723_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_723_groups_0 = const()[name = string("op_723_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91234304))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91529280))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91529408)))];
+            tensor<fp16, [1, 768, 1, 1]> var_723_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_723_dilations_0, groups = var_723_groups_0, pad = var_723_pad_0, pad_type = var_723_pad_type_0, strides = var_723_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_723_cast_fp16")];
+            string var_729_pad_type_0 = const()[name = string("op_729_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_729_strides_0 = const()[name = string("op_729_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_729_pad_0 = const()[name = string("op_729_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_729_dilations_0 = const()[name = string("op_729_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_729_groups_0 = const()[name = string("op_729_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91561536))), nonzero_data = tensor<fp16, [15213]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91531008))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_729_cast_fp16 = conv(dilations = var_729_dilations_0, groups = var_729_groups_0, pad = var_729_pad_0, pad_type = var_729_pad_type_0, strides = var_729_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_729_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_9_cast_fp16 = add(x = var_723_cast_fp16, y = var_729_cast_fp16)[name = string("query_9_cast_fp16")];
+            string var_738_pad_type_0 = const()[name = string("op_738_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_738_strides_0 = const()[name = string("op_738_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_738_pad_0 = const()[name = string("op_738_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_738_dilations_0 = const()[name = string("op_738_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_738_groups_0 = const()[name = string("op_738_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91635328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91930304))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_738_cast_fp16 = conv(dilations = var_738_dilations_0, groups = var_738_groups_0, pad = var_738_pad_0, pad_type = var_738_pad_type_0, strides = var_738_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_738_cast_fp16")];
+            string var_744_pad_type_0 = const()[name = string("op_744_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_744_strides_0 = const()[name = string("op_744_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_744_pad_0 = const()[name = string("op_744_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_744_dilations_0 = const()[name = string("op_744_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_744_groups_0 = const()[name = string("op_744_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91961984))), nonzero_data = tensor<fp16, [15714]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91930432))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_744_cast_fp16 = conv(dilations = var_744_dilations_0, groups = var_744_groups_0, pad = var_744_pad_0, pad_type = var_744_pad_type_0, strides = var_744_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_744_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_5_cast_fp16 = add(x = var_738_cast_fp16, y = var_744_cast_fp16)[name = string("current_key_5_cast_fp16")];
+            string var_754_pad_type_0 = const()[name = string("op_754_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_754_strides_0 = const()[name = string("op_754_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_754_pad_0 = const()[name = string("op_754_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_754_dilations_0 = const()[name = string("op_754_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_754_groups_0 = const()[name = string("op_754_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92035776))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92330752))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92330880)))];
+            tensor<fp16, [1, 768, 1, 1]> var_754_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_754_dilations_0, groups = var_754_groups_0, pad = var_754_pad_0, pad_type = var_754_pad_type_0, strides = var_754_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_754_cast_fp16")];
+            string var_760_pad_type_0 = const()[name = string("op_760_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_760_strides_0 = const()[name = string("op_760_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_760_pad_0 = const()[name = string("op_760_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_760_dilations_0 = const()[name = string("op_760_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_760_groups_0 = const()[name = string("op_760_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92365504))), nonzero_data = tensor<fp16, [16479]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92332480))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_760_cast_fp16 = conv(dilations = var_760_dilations_0, groups = var_760_groups_0, pad = var_760_pad_0, pad_type = var_760_pad_type_0, strides = var_760_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_760_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_5_cast_fp16 = add(x = var_754_cast_fp16, y = var_760_cast_fp16)[name = string("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_766_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_202_cast_fp16)[name = string("op_766_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_5_cast_fp16 = add(x = var_71_cast_fp16_2, y = var_766_cast_fp16)[name = string("key_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_768_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_202_cast_fp16)[name = string("op_768_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_5_cast_fp16 = add(x = var_86_cast_fp16_2, y = var_768_cast_fp16)[name = string("value_5_cast_fp16")];
+            tensor<int32, [4]> var_771 = const()[name = string("op_771"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_9_cast_fp16 = reshape(shape = var_771, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")];
+            fp16 var_773_to_fp16 = const()[name = string("op_773_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_774_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_773_to_fp16)[name = string("op_774_cast_fp16")];
+            tensor<int32, [4]> var_775 = const()[name = string("op_775"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_776_cast_fp16 = reshape(shape = var_775, x = key_5_cast_fp16)[name = string("op_776_cast_fp16")];
+            bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)];
+            bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_774_cast_fp16, y = var_776_cast_fp16)[name = string("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_19_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_784_cast_fp16 = softmax(axis = var_676, x = mh_w_19_cast_fp16)[name = string("op_784_cast_fp16")];
+            tensor<int32, [4]> var_785 = const()[name = string("op_785"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_786_cast_fp16 = reshape(shape = var_785, x = value_5_cast_fp16)[name = string("op_786_cast_fp16")];
+            bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)];
+            bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_786_cast_fp16, y = var_784_cast_fp16)[name = string("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_789 = const()[name = string("op_789"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_21_cast_fp16 = reshape(shape = var_789, x = attn_9_cast_fp16)[name = string("input_21_cast_fp16")];
+            string var_799_pad_type_0 = const()[name = string("op_799_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_799_strides_0 = const()[name = string("op_799_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_799_pad_0 = const()[name = string("op_799_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_799_dilations_0 = const()[name = string("op_799_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_799_groups_0 = const()[name = string("op_799_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92439296))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92734272))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92734400)))];
+            tensor<fp16, [1, 768, 1, 1]> var_799_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_799_dilations_0, groups = var_799_groups_0, pad = var_799_pad_0, pad_type = var_799_pad_type_0, strides = var_799_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("op_799_cast_fp16")];
+            string var_805_pad_type_0 = const()[name = string("op_805_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_805_strides_0 = const()[name = string("op_805_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_805_pad_0 = const()[name = string("op_805_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_805_dilations_0 = const()[name = string("op_805_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_805_groups_0 = const()[name = string("op_805_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92761920))), nonzero_data = tensor<fp16, [12911]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92736000))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_805_cast_fp16 = conv(dilations = var_805_dilations_0, groups = var_805_groups_0, pad = var_805_pad_0, pad_type = var_805_pad_type_0, strides = var_805_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = string("op_805_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_49_cast_fp16 = add(x = var_799_cast_fp16, y = var_805_cast_fp16)[name = string("obj_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_49_cast_fp16)[name = string("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_820_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")];
+            tensor<fp16, [768]> obj_51_gamma_0_to_fp16 = const()[name = string("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92835712)))];
+            tensor<fp16, [768]> obj_51_beta_0_to_fp16 = const()[name = string("obj_51_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92837312)))];
+            fp16 obj_51_epsilon_0_to_fp16 = const()[name = string("obj_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("obj_51_cast_fp16")];
+            string var_840_pad_type_0 = const()[name = string("op_840_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_840_strides_0 = const()[name = string("op_840_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_840_pad_0 = const()[name = string("op_840_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_840_dilations_0 = const()[name = string("op_840_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_840_groups_0 = const()[name = string("op_840_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92838912))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93133888))))[name = string("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93134016)))];
+            tensor<fp16, [1, 768, 1, 1]> var_840_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_840_dilations_0, groups = var_840_groups_0, pad = var_840_pad_0, pad_type = var_840_pad_type_0, strides = var_840_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = string("op_840_cast_fp16")];
+            string var_846_pad_type_0 = const()[name = string("op_846_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_846_strides_0 = const()[name = string("op_846_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_846_pad_0 = const()[name = string("op_846_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_846_dilations_0 = const()[name = string("op_846_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_846_groups_0 = const()[name = string("op_846_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93146368))), nonzero_data = tensor<fp16, [5336]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93135616))))[name = string("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_846_cast_fp16 = conv(dilations = var_846_dilations_0, groups = var_846_groups_0, pad = var_846_pad_0, pad_type = var_846_pad_type_0, strides = var_846_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = string("op_846_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_11_cast_fp16 = add(x = var_840_cast_fp16, y = var_846_cast_fp16)[name = string("query_11_cast_fp16")];
+            tensor<int32, [4]> var_849 = const()[name = string("op_849"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_11_cast_fp16 = reshape(shape = var_849, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")];
+            fp16 var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_852_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_851_to_fp16)[name = string("op_852_cast_fp16")];
+            tensor<int32, [4]> var_853 = const()[name = string("op_853"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_854_cast_fp16 = reshape(shape = var_853, x = obj_53_cast_fp16)[name = string("op_854_cast_fp16")];
+            bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)];
+            bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_852_cast_fp16, y = var_854_cast_fp16)[name = string("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_59_cast_fp16 = softmax(axis = var_676, x = mh_w_23_cast_fp16)[name = string("obj_59_cast_fp16")];
+            tensor<int32, [4]> var_863 = const()[name = string("op_863"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_864_cast_fp16 = reshape(shape = var_863, x = obj_55_cast_fp16)[name = string("op_864_cast_fp16")];
+            bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)];
+            bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_864_cast_fp16, y = obj_59_cast_fp16)[name = string("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_867 = const()[name = string("op_867"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_23_cast_fp16 = reshape(shape = var_867, x = attn_11_cast_fp16)[name = string("input_23_cast_fp16")];
+            string var_877_pad_type_0 = const()[name = string("op_877_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_877_strides_0 = const()[name = string("op_877_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_877_pad_0 = const()[name = string("op_877_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_877_dilations_0 = const()[name = string("op_877_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_877_groups_0 = const()[name = string("op_877_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93220160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93515136))))[name = string("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93515264)))];
+            tensor<fp16, [1, 768, 1, 1]> var_877_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_877_dilations_0, groups = var_877_groups_0, pad = var_877_pad_0, pad_type = var_877_pad_type_0, strides = var_877_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_877_cast_fp16")];
+            string var_883_pad_type_0 = const()[name = string("op_883_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_883_strides_0 = const()[name = string("op_883_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_883_pad_0 = const()[name = string("op_883_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_883_dilations_0 = const()[name = string("op_883_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_883_groups_0 = const()[name = string("op_883_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93526336))), nonzero_data = tensor<fp16, [4676]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93516864))))[name = string("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_883_cast_fp16 = conv(dilations = var_883_dilations_0, groups = var_883_groups_0, pad = var_883_pad_0, pad_type = var_883_pad_type_0, strides = var_883_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_883_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_57_cast_fp16 = add(x = var_877_cast_fp16, y = var_883_cast_fp16)[name = string("obj_57_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_57_cast_fp16)[name = string("inputs_17_cast_fp16")];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_894_to_fp16 = const()[name = string("op_894_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_894_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")];
+            tensor<fp16, [768]> input_25_gamma_0_to_fp16 = const()[name = string("input_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93600128)))];
+            tensor<fp16, [768]> input_25_beta_0_to_fp16 = const()[name = string("input_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93601728)))];
+            fp16 input_25_epsilon_0_to_fp16 = const()[name = string("input_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("input_25_cast_fp16")];
+            string var_912_pad_type_0 = const()[name = string("op_912_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_912_strides_0 = const()[name = string("op_912_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_912_pad_0 = const()[name = string("op_912_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_912_dilations_0 = const()[name = string("op_912_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_912_groups_0 = const()[name = string("op_912_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93603328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94783040))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94783168)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_912_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_912_dilations_0, groups = var_912_groups_0, pad = var_912_pad_0, pad_type = var_912_pad_type_0, strides = var_912_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_912_cast_fp16")];
+            string var_918_pad_type_0 = const()[name = string("op_918_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_918_strides_0 = const()[name = string("op_918_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_918_pad_0 = const()[name = string("op_918_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_918_dilations_0 = const()[name = string("op_918_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_918_groups_0 = const()[name = string("op_918_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94895552))), nonzero_data = tensor<fp16, [53042]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94789376))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_918_cast_fp16 = conv(dilations = var_918_dilations_0, groups = var_918_groups_0, pad = var_918_pad_0, pad_type = var_918_pad_type_0, strides = var_918_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_918_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_27_cast_fp16 = add(x = var_912_cast_fp16, y = var_918_cast_fp16)[name = string("input_27_cast_fp16")];
+            string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")];
+            string var_929_pad_type_0 = const()[name = string("op_929_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_929_strides_0 = const()[name = string("op_929_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_929_pad_0 = const()[name = string("op_929_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_929_dilations_0 = const()[name = string("op_929_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_929_groups_0 = const()[name = string("op_929_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(95190528))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96370240))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96370368)))];
+            tensor<fp16, [1, 768, 1, 1]> var_929_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_929_dilations_0, groups = var_929_groups_0, pad = var_929_pad_0, pad_type = var_929_pad_type_0, strides = var_929_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("op_929_cast_fp16")];
+            string var_935_pad_type_0 = const()[name = string("op_935_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_935_strides_0 = const()[name = string("op_935_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_935_pad_0 = const()[name = string("op_935_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_935_dilations_0 = const()[name = string("op_935_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_935_groups_0 = const()[name = string("op_935_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96483392))), nonzero_data = tensor<fp16, [55671]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96371968))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_935_cast_fp16 = conv(dilations = var_935_dilations_0, groups = var_935_groups_0, pad = var_935_pad_0, pad_type = var_935_pad_type_0, strides = var_935_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = string("op_935_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_7_cast_fp16 = add(x = var_929_cast_fp16, y = var_935_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_19_cast_fp16")];
+            tensor<int32, [4]> obj_71_begin_0 = const()[name = string("obj_71_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> obj_71_end_0 = const()[name = string("obj_71_end_0"), val = tensor<int32, [4]>([4, 768, 1, 1536])];
+            tensor<bool, [4]> obj_71_end_mask_0 = const()[name = string("obj_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_71_cast_fp16 = slice_by_index(begin = obj_71_begin_0, end = obj_71_end_0, end_mask = obj_71_end_mask_0, x = read_state_2)[name = string("obj_71_cast_fp16")];
+            tensor<int32, [4]> obj_73_begin_0 = const()[name = string("obj_73_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> obj_73_end_0 = const()[name = string("obj_73_end_0"), val = tensor<int32, [4]>([4, 768, 1, 1536])];
+            tensor<bool, [4]> obj_73_end_mask_0 = const()[name = string("obj_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_73_cast_fp16 = slice_by_index(begin = obj_73_begin_0, end = obj_73_end_0, end_mask = obj_73_end_mask_0, x = read_state_3)[name = string("obj_73_cast_fp16")];
+            int32 var_957 = const()[name = string("op_957"), val = int32(3)];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_982_to_fp16 = const()[name = string("op_982_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_982_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")];
+            tensor<fp16, [768]> obj_61_gamma_0_to_fp16 = const()[name = string("obj_61_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96778368)))];
+            tensor<fp16, [768]> obj_61_beta_0_to_fp16 = const()[name = string("obj_61_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96779968)))];
+            fp16 obj_61_epsilon_0_to_fp16 = const()[name = string("obj_61_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("obj_61_cast_fp16")];
+            string var_1004_pad_type_0 = const()[name = string("op_1004_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1004_strides_0 = const()[name = string("op_1004_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1004_pad_0 = const()[name = string("op_1004_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1004_dilations_0 = const()[name = string("op_1004_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1004_groups_0 = const()[name = string("op_1004_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96781568))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97076544))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97076672)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1004_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1004_dilations_0, groups = var_1004_groups_0, pad = var_1004_pad_0, pad_type = var_1004_pad_type_0, strides = var_1004_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1004_cast_fp16")];
+            string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97097024))), nonzero_data = tensor<fp16, [9316]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97078272))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1010_cast_fp16 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1010_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_13_cast_fp16 = add(x = var_1004_cast_fp16, y = var_1010_cast_fp16)[name = string("query_13_cast_fp16")];
+            string var_1019_pad_type_0 = const()[name = string("op_1019_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1019_strides_0 = const()[name = string("op_1019_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1019_pad_0 = const()[name = string("op_1019_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1019_dilations_0 = const()[name = string("op_1019_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1019_groups_0 = const()[name = string("op_1019_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97170816))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97465792))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_1019_cast_fp16 = conv(dilations = var_1019_dilations_0, groups = var_1019_groups_0, pad = var_1019_pad_0, pad_type = var_1019_pad_type_0, strides = var_1019_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1019_cast_fp16")];
+            string var_1025_pad_type_0 = const()[name = string("op_1025_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1025_strides_0 = const()[name = string("op_1025_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1025_pad_0 = const()[name = string("op_1025_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1025_dilations_0 = const()[name = string("op_1025_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1025_groups_0 = const()[name = string("op_1025_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97482880))), nonzero_data = tensor<fp16, [8433]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97465920))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1025_cast_fp16 = conv(dilations = var_1025_dilations_0, groups = var_1025_groups_0, pad = var_1025_pad_0, pad_type = var_1025_pad_type_0, strides = var_1025_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1025_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_7_cast_fp16 = add(x = var_1019_cast_fp16, y = var_1025_cast_fp16)[name = string("current_key_7_cast_fp16")];
+            string var_1035_pad_type_0 = const()[name = string("op_1035_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1035_strides_0 = const()[name = string("op_1035_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1035_pad_0 = const()[name = string("op_1035_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1035_dilations_0 = const()[name = string("op_1035_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1035_groups_0 = const()[name = string("op_1035_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97556672))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97851648))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97851776)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1035_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1035_dilations_0, groups = var_1035_groups_0, pad = var_1035_pad_0, pad_type = var_1035_pad_type_0, strides = var_1035_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1035_cast_fp16")];
+            string var_1041_pad_type_0 = const()[name = string("op_1041_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1041_strides_0 = const()[name = string("op_1041_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1041_pad_0 = const()[name = string("op_1041_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1041_dilations_0 = const()[name = string("op_1041_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1041_groups_0 = const()[name = string("op_1041_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97869504))), nonzero_data = tensor<fp16, [8010]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97853376))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1041_cast_fp16 = conv(dilations = var_1041_dilations_0, groups = var_1041_groups_0, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1041_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1041_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_7_cast_fp16 = add(x = var_1035_cast_fp16, y = var_1041_cast_fp16)[name = string("current_value_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1047_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_202_cast_fp16)[name = string("op_1047_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_7_cast_fp16 = add(x = var_71_cast_fp16_3, y = var_1047_cast_fp16)[name = string("key_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1049_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_202_cast_fp16)[name = string("op_1049_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_7_cast_fp16 = add(x = var_86_cast_fp16_3, y = var_1049_cast_fp16)[name = string("value_7_cast_fp16")];
+            tensor<int32, [4]> var_1052 = const()[name = string("op_1052"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_13_cast_fp16 = reshape(shape = var_1052, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")];
+            fp16 var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1055_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1054_to_fp16)[name = string("op_1055_cast_fp16")];
+            tensor<int32, [4]> var_1056 = const()[name = string("op_1056"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1057_cast_fp16 = reshape(shape = var_1056, x = key_7_cast_fp16)[name = string("op_1057_cast_fp16")];
+            bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)];
+            bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1055_cast_fp16, y = var_1057_cast_fp16)[name = string("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_1065_cast_fp16 = softmax(axis = var_957, x = mh_w_27_cast_fp16)[name = string("op_1065_cast_fp16")];
+            tensor<int32, [4]> var_1066 = const()[name = string("op_1066"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1067_cast_fp16 = reshape(shape = var_1066, x = value_7_cast_fp16)[name = string("op_1067_cast_fp16")];
+            bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)];
+            bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1067_cast_fp16, y = var_1065_cast_fp16)[name = string("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_1070 = const()[name = string("op_1070"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_31_cast_fp16 = reshape(shape = var_1070, x = attn_13_cast_fp16)[name = string("input_31_cast_fp16")];
+            string var_1080_pad_type_0 = const()[name = string("op_1080_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1080_strides_0 = const()[name = string("op_1080_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1080_pad_0 = const()[name = string("op_1080_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1080_dilations_0 = const()[name = string("op_1080_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1080_groups_0 = const()[name = string("op_1080_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97943296))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98238272))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98238400)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1080_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1080_dilations_0, groups = var_1080_groups_0, pad = var_1080_pad_0, pad_type = var_1080_pad_type_0, strides = var_1080_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_1080_cast_fp16")];
+            string var_1086_pad_type_0 = const()[name = string("op_1086_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1086_strides_0 = const()[name = string("op_1086_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1086_pad_0 = const()[name = string("op_1086_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1086_dilations_0 = const()[name = string("op_1086_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1086_groups_0 = const()[name = string("op_1086_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98260224))), nonzero_data = tensor<fp16, [10051]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98240000))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1086_cast_fp16 = conv(dilations = var_1086_dilations_0, groups = var_1086_groups_0, pad = var_1086_pad_0, pad_type = var_1086_pad_type_0, strides = var_1086_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_1086_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_67_cast_fp16 = add(x = var_1080_cast_fp16, y = var_1086_cast_fp16)[name = string("obj_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_21_cast_fp16")];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1101_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")];
+            tensor<fp16, [768]> obj_69_gamma_0_to_fp16 = const()[name = string("obj_69_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98334016)))];
+            tensor<fp16, [768]> obj_69_beta_0_to_fp16 = const()[name = string("obj_69_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98335616)))];
+            fp16 obj_69_epsilon_0_to_fp16 = const()[name = string("obj_69_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_69_cast_fp16")];
+            string var_1121_pad_type_0 = const()[name = string("op_1121_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1121_strides_0 = const()[name = string("op_1121_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1121_pad_0 = const()[name = string("op_1121_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1121_dilations_0 = const()[name = string("op_1121_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1121_groups_0 = const()[name = string("op_1121_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98337216))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98632192))))[name = string("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98632320)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1121_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1121_dilations_0, groups = var_1121_groups_0, pad = var_1121_pad_0, pad_type = var_1121_pad_type_0, strides = var_1121_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_1121_cast_fp16")];
+            string var_1127_pad_type_0 = const()[name = string("op_1127_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1127_strides_0 = const()[name = string("op_1127_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1127_pad_0 = const()[name = string("op_1127_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1127_dilations_0 = const()[name = string("op_1127_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1127_groups_0 = const()[name = string("op_1127_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98650816))), nonzero_data = tensor<fp16, [8399]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98633920))))[name = string("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1127_cast_fp16 = conv(dilations = var_1127_dilations_0, groups = var_1127_groups_0, pad = var_1127_pad_0, pad_type = var_1127_pad_type_0, strides = var_1127_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_1127_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_15_cast_fp16 = add(x = var_1121_cast_fp16, y = var_1127_cast_fp16)[name = string("query_15_cast_fp16")];
+            tensor<int32, [4]> var_1130 = const()[name = string("op_1130"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_15_cast_fp16 = reshape(shape = var_1130, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")];
+            fp16 var_1132_to_fp16 = const()[name = string("op_1132_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1133_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1132_to_fp16)[name = string("op_1133_cast_fp16")];
+            tensor<int32, [4]> var_1134 = const()[name = string("op_1134"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1135_cast_fp16 = reshape(shape = var_1134, x = obj_71_cast_fp16)[name = string("op_1135_cast_fp16")];
+            bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)];
+            bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1133_cast_fp16, y = var_1135_cast_fp16)[name = string("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_31_cast_fp16 = add(x = mh_w_29_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_31_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_77_cast_fp16 = softmax(axis = var_957, x = mh_w_31_cast_fp16)[name = string("obj_77_cast_fp16")];
+            tensor<int32, [4]> var_1144 = const()[name = string("op_1144"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1145_cast_fp16 = reshape(shape = var_1144, x = obj_73_cast_fp16)[name = string("op_1145_cast_fp16")];
+            bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)];
+            bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1145_cast_fp16, y = obj_77_cast_fp16)[name = string("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_1148 = const()[name = string("op_1148"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_33_cast_fp16 = reshape(shape = var_1148, x = attn_15_cast_fp16)[name = string("input_33_cast_fp16")];
+            string var_1158_pad_type_0 = const()[name = string("op_1158_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1158_strides_0 = const()[name = string("op_1158_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1158_pad_0 = const()[name = string("op_1158_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1158_dilations_0 = const()[name = string("op_1158_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1158_groups_0 = const()[name = string("op_1158_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98724608))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99019584))))[name = string("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99019712)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1158_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1158_dilations_0, groups = var_1158_groups_0, pad = var_1158_pad_0, pad_type = var_1158_pad_type_0, strides = var_1158_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1158_cast_fp16")];
+            string var_1164_pad_type_0 = const()[name = string("op_1164_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1164_strides_0 = const()[name = string("op_1164_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1164_pad_0 = const()[name = string("op_1164_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1164_dilations_0 = const()[name = string("op_1164_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1164_groups_0 = const()[name = string("op_1164_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99034624))), nonzero_data = tensor<fp16, [6602]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99021312))))[name = string("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1164_cast_fp16 = conv(dilations = var_1164_dilations_0, groups = var_1164_groups_0, pad = var_1164_pad_0, pad_type = var_1164_pad_type_0, strides = var_1164_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1164_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_75_cast_fp16 = add(x = var_1158_cast_fp16, y = var_1164_cast_fp16)[name = string("obj_75_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1175_to_fp16 = const()[name = string("op_1175_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1175_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")];
+            tensor<fp16, [768]> input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99108416)))];
+            tensor<fp16, [768]> input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99110016)))];
+            fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_35_cast_fp16")];
+            string var_1193_pad_type_0 = const()[name = string("op_1193_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1193_strides_0 = const()[name = string("op_1193_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1193_pad_0 = const()[name = string("op_1193_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1193_dilations_0 = const()[name = string("op_1193_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1193_groups_0 = const()[name = string("op_1193_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99111616))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100291328))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100291456)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_1193_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1193_dilations_0, groups = var_1193_groups_0, pad = var_1193_pad_0, pad_type = var_1193_pad_type_0, strides = var_1193_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1193_cast_fp16")];
+            string var_1199_pad_type_0 = const()[name = string("op_1199_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1199_strides_0 = const()[name = string("op_1199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1199_pad_0 = const()[name = string("op_1199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1199_dilations_0 = const()[name = string("op_1199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1199_groups_0 = const()[name = string("op_1199_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100362496))), nonzero_data = tensor<fp16, [32379]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100297664))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_1199_cast_fp16 = conv(dilations = var_1199_dilations_0, groups = var_1199_groups_0, pad = var_1199_pad_0, pad_type = var_1199_pad_type_0, strides = var_1199_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1199_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_37_cast_fp16 = add(x = var_1193_cast_fp16, y = var_1199_cast_fp16)[name = string("input_37_cast_fp16")];
+            string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")];
+            string var_1210_pad_type_0 = const()[name = string("op_1210_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1210_strides_0 = const()[name = string("op_1210_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1210_pad_0 = const()[name = string("op_1210_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1210_dilations_0 = const()[name = string("op_1210_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1210_groups_0 = const()[name = string("op_1210_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100657472))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101837184))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101837312)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1210_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1210_dilations_0, groups = var_1210_groups_0, pad = var_1210_pad_0, pad_type = var_1210_pad_type_0, strides = var_1210_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1210_cast_fp16")];
+            string var_1216_pad_type_0 = const()[name = string("op_1216_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1216_strides_0 = const()[name = string("op_1216_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1216_pad_0 = const()[name = string("op_1216_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1216_dilations_0 = const()[name = string("op_1216_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1216_groups_0 = const()[name = string("op_1216_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101896448))), nonzero_data = tensor<fp16, [28718]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101838912))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1216_cast_fp16 = conv(dilations = var_1216_dilations_0, groups = var_1216_groups_0, pad = var_1216_pad_0, pad_type = var_1216_pad_type_0, strides = var_1216_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1216_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_9_cast_fp16 = add(x = var_1210_cast_fp16, y = var_1216_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_25_cast_fp16")];
+            tensor<int32, [4]> obj_89_begin_0 = const()[name = string("obj_89_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> obj_89_end_0 = const()[name = string("obj_89_end_0"), val = tensor<int32, [4]>([5, 768, 1, 1536])];
+            tensor<bool, [4]> obj_89_end_mask_0 = const()[name = string("obj_89_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_89_cast_fp16 = slice_by_index(begin = obj_89_begin_0, end = obj_89_end_0, end_mask = obj_89_end_mask_0, x = read_state_2)[name = string("obj_89_cast_fp16")];
+            tensor<int32, [4]> obj_91_begin_0 = const()[name = string("obj_91_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> obj_91_end_0 = const()[name = string("obj_91_end_0"), val = tensor<int32, [4]>([5, 768, 1, 1536])];
+            tensor<bool, [4]> obj_91_end_mask_0 = const()[name = string("obj_91_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_91_cast_fp16 = slice_by_index(begin = obj_91_begin_0, end = obj_91_end_0, end_mask = obj_91_end_mask_0, x = read_state_3)[name = string("obj_91_cast_fp16")];
+            int32 var_1238 = const()[name = string("op_1238"), val = int32(3)];
+            tensor<int32, [1]> out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1263_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")];
+            tensor<fp16, [768]> obj_79_gamma_0_to_fp16 = const()[name = string("obj_79_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102191424)))];
+            tensor<fp16, [768]> obj_79_beta_0_to_fp16 = const()[name = string("obj_79_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102193024)))];
+            fp16 obj_79_epsilon_0_to_fp16 = const()[name = string("obj_79_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_79_cast_fp16")];
+            string var_1285_pad_type_0 = const()[name = string("op_1285_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1285_strides_0 = const()[name = string("op_1285_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1285_pad_0 = const()[name = string("op_1285_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1285_dilations_0 = const()[name = string("op_1285_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1285_groups_0 = const()[name = string("op_1285_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102194624))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102489600))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102489728)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1285_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1285_dilations_0, groups = var_1285_groups_0, pad = var_1285_pad_0, pad_type = var_1285_pad_type_0, strides = var_1285_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1285_cast_fp16")];
+            string var_1291_pad_type_0 = const()[name = string("op_1291_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1291_strides_0 = const()[name = string("op_1291_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1291_pad_0 = const()[name = string("op_1291_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1291_dilations_0 = const()[name = string("op_1291_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1291_groups_0 = const()[name = string("op_1291_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102501568))), nonzero_data = tensor<fp16, [5062]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102491328))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1291_cast_fp16 = conv(dilations = var_1291_dilations_0, groups = var_1291_groups_0, pad = var_1291_pad_0, pad_type = var_1291_pad_type_0, strides = var_1291_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1291_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_17_cast_fp16 = add(x = var_1285_cast_fp16, y = var_1291_cast_fp16)[name = string("query_17_cast_fp16")];
+            string var_1300_pad_type_0 = const()[name = string("op_1300_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1300_strides_0 = const()[name = string("op_1300_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1300_pad_0 = const()[name = string("op_1300_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1300_dilations_0 = const()[name = string("op_1300_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1300_groups_0 = const()[name = string("op_1300_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102575360))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102870336))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_1300_cast_fp16 = conv(dilations = var_1300_dilations_0, groups = var_1300_groups_0, pad = var_1300_pad_0, pad_type = var_1300_pad_type_0, strides = var_1300_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1300_cast_fp16")];
+            string var_1306_pad_type_0 = const()[name = string("op_1306_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1306_strides_0 = const()[name = string("op_1306_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1306_pad_0 = const()[name = string("op_1306_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1306_dilations_0 = const()[name = string("op_1306_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1306_groups_0 = const()[name = string("op_1306_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102881536))), nonzero_data = tensor<fp16, [5478]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102870464))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1306_cast_fp16 = conv(dilations = var_1306_dilations_0, groups = var_1306_groups_0, pad = var_1306_pad_0, pad_type = var_1306_pad_type_0, strides = var_1306_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1306_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_9_cast_fp16 = add(x = var_1300_cast_fp16, y = var_1306_cast_fp16)[name = string("current_key_9_cast_fp16")];
+            string var_1316_pad_type_0 = const()[name = string("op_1316_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1316_strides_0 = const()[name = string("op_1316_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1316_pad_0 = const()[name = string("op_1316_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1316_dilations_0 = const()[name = string("op_1316_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1316_groups_0 = const()[name = string("op_1316_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102955328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103250304))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103250432)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1316_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1316_dilations_0, groups = var_1316_groups_0, pad = var_1316_pad_0, pad_type = var_1316_pad_type_0, strides = var_1316_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1316_cast_fp16")];
+            string var_1322_pad_type_0 = const()[name = string("op_1322_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1322_strides_0 = const()[name = string("op_1322_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1322_pad_0 = const()[name = string("op_1322_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1322_dilations_0 = const()[name = string("op_1322_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1322_groups_0 = const()[name = string("op_1322_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103261824))), nonzero_data = tensor<fp16, [4851]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103252032))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1322_cast_fp16 = conv(dilations = var_1322_dilations_0, groups = var_1322_groups_0, pad = var_1322_pad_0, pad_type = var_1322_pad_type_0, strides = var_1322_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1322_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_9_cast_fp16 = add(x = var_1316_cast_fp16, y = var_1322_cast_fp16)[name = string("current_value_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1328_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_202_cast_fp16)[name = string("op_1328_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_9_cast_fp16 = add(x = var_71_cast_fp16_4, y = var_1328_cast_fp16)[name = string("key_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1330_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_202_cast_fp16)[name = string("op_1330_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_9_cast_fp16 = add(x = var_86_cast_fp16_4, y = var_1330_cast_fp16)[name = string("value_9_cast_fp16")];
+            tensor<int32, [4]> var_1333 = const()[name = string("op_1333"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_17_cast_fp16 = reshape(shape = var_1333, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")];
+            fp16 var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1336_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1335_to_fp16)[name = string("op_1336_cast_fp16")];
+            tensor<int32, [4]> var_1337 = const()[name = string("op_1337"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1338_cast_fp16 = reshape(shape = var_1337, x = key_9_cast_fp16)[name = string("op_1338_cast_fp16")];
+            bool mh_w_33_transpose_x_0 = const()[name = string("mh_w_33_transpose_x_0"), val = bool(true)];
+            bool mh_w_33_transpose_y_0 = const()[name = string("mh_w_33_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_33_cast_fp16 = matmul(transpose_x = mh_w_33_transpose_x_0, transpose_y = mh_w_33_transpose_y_0, x = var_1336_cast_fp16, y = var_1338_cast_fp16)[name = string("mh_w_33_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_35_cast_fp16 = add(x = mh_w_33_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_35_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_1346_cast_fp16 = softmax(axis = var_1238, x = mh_w_35_cast_fp16)[name = string("op_1346_cast_fp16")];
+            tensor<int32, [4]> var_1347 = const()[name = string("op_1347"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1348_cast_fp16 = reshape(shape = var_1347, x = value_9_cast_fp16)[name = string("op_1348_cast_fp16")];
+            bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)];
+            bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1348_cast_fp16, y = var_1346_cast_fp16)[name = string("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1351 = const()[name = string("op_1351"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_41_cast_fp16 = reshape(shape = var_1351, x = attn_17_cast_fp16)[name = string("input_41_cast_fp16")];
+            string var_1361_pad_type_0 = const()[name = string("op_1361_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1361_strides_0 = const()[name = string("op_1361_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1361_pad_0 = const()[name = string("op_1361_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1361_dilations_0 = const()[name = string("op_1361_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1361_groups_0 = const()[name = string("op_1361_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103335616))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103630592))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103630720)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1361_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1361_dilations_0, groups = var_1361_groups_0, pad = var_1361_pad_0, pad_type = var_1361_pad_type_0, strides = var_1361_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1361_cast_fp16")];
+            string var_1367_pad_type_0 = const()[name = string("op_1367_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1367_strides_0 = const()[name = string("op_1367_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1367_pad_0 = const()[name = string("op_1367_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1367_dilations_0 = const()[name = string("op_1367_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1367_groups_0 = const()[name = string("op_1367_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103644992))), nonzero_data = tensor<fp16, [6288]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103632320))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1367_cast_fp16 = conv(dilations = var_1367_dilations_0, groups = var_1367_groups_0, pad = var_1367_pad_0, pad_type = var_1367_pad_type_0, strides = var_1367_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1367_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_85_cast_fp16 = add(x = var_1361_cast_fp16, y = var_1367_cast_fp16)[name = string("obj_85_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_85_cast_fp16)[name = string("inputs_27_cast_fp16")];
+            tensor<int32, [1]> out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1382_to_fp16 = const()[name = string("op_1382_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1382_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")];
+            tensor<fp16, [768]> obj_87_gamma_0_to_fp16 = const()[name = string("obj_87_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103718784)))];
+            tensor<fp16, [768]> obj_87_beta_0_to_fp16 = const()[name = string("obj_87_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103720384)))];
+            fp16 obj_87_epsilon_0_to_fp16 = const()[name = string("obj_87_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_87_cast_fp16 = batch_norm(beta = obj_87_beta_0_to_fp16, epsilon = obj_87_epsilon_0_to_fp16, gamma = obj_87_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("obj_87_cast_fp16")];
+            string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103721984))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104016960))))[name = string("layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104017088)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1402_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_87_cast_fp16)[name = string("op_1402_cast_fp16")];
+            string var_1408_pad_type_0 = const()[name = string("op_1408_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1408_strides_0 = const()[name = string("op_1408_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1408_pad_0 = const()[name = string("op_1408_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1408_dilations_0 = const()[name = string("op_1408_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1408_groups_0 = const()[name = string("op_1408_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104032192))), nonzero_data = tensor<fp16, [6708]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104018688))))[name = string("layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1408_cast_fp16 = conv(dilations = var_1408_dilations_0, groups = var_1408_groups_0, pad = var_1408_pad_0, pad_type = var_1408_pad_type_0, strides = var_1408_strides_0, weight = layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_87_cast_fp16)[name = string("op_1408_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_19_cast_fp16 = add(x = var_1402_cast_fp16, y = var_1408_cast_fp16)[name = string("query_19_cast_fp16")];
+            tensor<int32, [4]> var_1411 = const()[name = string("op_1411"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_19_cast_fp16 = reshape(shape = var_1411, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")];
+            fp16 var_1413_to_fp16 = const()[name = string("op_1413_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1414_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1413_to_fp16)[name = string("op_1414_cast_fp16")];
+            tensor<int32, [4]> var_1415 = const()[name = string("op_1415"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1416_cast_fp16 = reshape(shape = var_1415, x = obj_89_cast_fp16)[name = string("op_1416_cast_fp16")];
+            bool mh_w_37_transpose_x_0 = const()[name = string("mh_w_37_transpose_x_0"), val = bool(true)];
+            bool mh_w_37_transpose_y_0 = const()[name = string("mh_w_37_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_1414_cast_fp16, y = var_1416_cast_fp16)[name = string("mh_w_37_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_39_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_95_cast_fp16 = softmax(axis = var_1238, x = mh_w_39_cast_fp16)[name = string("obj_95_cast_fp16")];
+            tensor<int32, [4]> var_1425 = const()[name = string("op_1425"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1426_cast_fp16 = reshape(shape = var_1425, x = obj_91_cast_fp16)[name = string("op_1426_cast_fp16")];
+            bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)];
+            bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1426_cast_fp16, y = obj_95_cast_fp16)[name = string("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1429 = const()[name = string("op_1429"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_43_cast_fp16 = reshape(shape = var_1429, x = attn_19_cast_fp16)[name = string("input_43_cast_fp16")];
+            string var_1439_pad_type_0 = const()[name = string("op_1439_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1439_strides_0 = const()[name = string("op_1439_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1439_pad_0 = const()[name = string("op_1439_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1439_dilations_0 = const()[name = string("op_1439_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1439_groups_0 = const()[name = string("op_1439_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104105984))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104400960))))[name = string("layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104401088)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1439_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1439_dilations_0, groups = var_1439_groups_0, pad = var_1439_pad_0, pad_type = var_1439_pad_type_0, strides = var_1439_strides_0, weight = layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1439_cast_fp16")];
+            string var_1445_pad_type_0 = const()[name = string("op_1445_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1445_strides_0 = const()[name = string("op_1445_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1445_pad_0 = const()[name = string("op_1445_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1445_dilations_0 = const()[name = string("op_1445_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1445_groups_0 = const()[name = string("op_1445_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104412544))), nonzero_data = tensor<fp16, [4875]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104402688))))[name = string("layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1445_cast_fp16 = conv(dilations = var_1445_dilations_0, groups = var_1445_groups_0, pad = var_1445_pad_0, pad_type = var_1445_pad_type_0, strides = var_1445_strides_0, weight = layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1445_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_93_cast_fp16 = add(x = var_1439_cast_fp16, y = var_1445_cast_fp16)[name = string("obj_93_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_93_cast_fp16)[name = string("inputs_29_cast_fp16")];
+            tensor<int32, [1]> out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1456_to_fp16 = const()[name = string("op_1456_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1456_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")];
+            tensor<fp16, [768]> input_45_gamma_0_to_fp16 = const()[name = string("input_45_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104486336)))];
+            tensor<fp16, [768]> input_45_beta_0_to_fp16 = const()[name = string("input_45_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104487936)))];
+            fp16 input_45_epsilon_0_to_fp16 = const()[name = string("input_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("input_45_cast_fp16")];
+            string var_1474_pad_type_0 = const()[name = string("op_1474_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1474_strides_0 = const()[name = string("op_1474_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1474_pad_0 = const()[name = string("op_1474_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1474_dilations_0 = const()[name = string("op_1474_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1474_groups_0 = const()[name = string("op_1474_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104489536))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105669248))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105669376)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_1474_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1474_dilations_0, groups = var_1474_groups_0, pad = var_1474_pad_0, pad_type = var_1474_pad_type_0, strides = var_1474_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = string("op_1474_cast_fp16")];
+            string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105710592))), nonzero_data = tensor<fp16, [17454]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105675584))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_45_cast_fp16)[name = string("op_1480_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_47_cast_fp16 = add(x = var_1474_cast_fp16, y = var_1480_cast_fp16)[name = string("input_47_cast_fp16")];
+            string input_49_mode_0 = const()[name = string("input_49_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")];
+            string var_1491_pad_type_0 = const()[name = string("op_1491_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1491_strides_0 = const()[name = string("op_1491_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1491_pad_0 = const()[name = string("op_1491_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1491_dilations_0 = const()[name = string("op_1491_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1491_groups_0 = const()[name = string("op_1491_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106005568))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107185280))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107185408)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1491_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1491_dilations_0, groups = var_1491_groups_0, pad = var_1491_pad_0, pad_type = var_1491_pad_type_0, strides = var_1491_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1491_cast_fp16")];
+            string var_1497_pad_type_0 = const()[name = string("op_1497_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1497_strides_0 = const()[name = string("op_1497_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1497_pad_0 = const()[name = string("op_1497_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1497_dilations_0 = const()[name = string("op_1497_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1497_groups_0 = const()[name = string("op_1497_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107218304))), nonzero_data = tensor<fp16, [15590]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107187008))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1497_cast_fp16 = conv(dilations = var_1497_dilations_0, groups = var_1497_groups_0, pad = var_1497_pad_0, pad_type = var_1497_pad_type_0, strides = var_1497_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1497_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_11_cast_fp16 = add(x = var_1491_cast_fp16, y = var_1497_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_31_cast_fp16")];
+            tensor<int32, [4]> obj_107_begin_0 = const()[name = string("obj_107_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> obj_107_end_0 = const()[name = string("obj_107_end_0"), val = tensor<int32, [4]>([6, 768, 1, 1536])];
+            tensor<bool, [4]> obj_107_end_mask_0 = const()[name = string("obj_107_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_107_cast_fp16 = slice_by_index(begin = obj_107_begin_0, end = obj_107_end_0, end_mask = obj_107_end_mask_0, x = read_state_2)[name = string("obj_107_cast_fp16")];
+            tensor<int32, [4]> obj_109_begin_0 = const()[name = string("obj_109_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> obj_109_end_0 = const()[name = string("obj_109_end_0"), val = tensor<int32, [4]>([6, 768, 1, 1536])];
+            tensor<bool, [4]> obj_109_end_mask_0 = const()[name = string("obj_109_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_109_cast_fp16 = slice_by_index(begin = obj_109_begin_0, end = obj_109_end_0, end_mask = obj_109_end_mask_0, x = read_state_3)[name = string("obj_109_cast_fp16")];
+            int32 var_1519 = const()[name = string("op_1519"), val = int32(3)];
+            tensor<int32, [1]> out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1544_to_fp16 = const()[name = string("op_1544_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1544_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")];
+            tensor<fp16, [768]> obj_97_gamma_0_to_fp16 = const()[name = string("obj_97_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107513280)))];
+            tensor<fp16, [768]> obj_97_beta_0_to_fp16 = const()[name = string("obj_97_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107514880)))];
+            fp16 obj_97_epsilon_0_to_fp16 = const()[name = string("obj_97_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_97_cast_fp16 = batch_norm(beta = obj_97_beta_0_to_fp16, epsilon = obj_97_epsilon_0_to_fp16, gamma = obj_97_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("obj_97_cast_fp16")];
+            string var_1566_pad_type_0 = const()[name = string("op_1566_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1566_strides_0 = const()[name = string("op_1566_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1566_pad_0 = const()[name = string("op_1566_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1566_dilations_0 = const()[name = string("op_1566_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1566_groups_0 = const()[name = string("op_1566_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107516480))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107811456))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107811584)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1566_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1566_dilations_0, groups = var_1566_groups_0, pad = var_1566_pad_0, pad_type = var_1566_pad_type_0, strides = var_1566_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1566_cast_fp16")];
+            string var_1572_pad_type_0 = const()[name = string("op_1572_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1572_strides_0 = const()[name = string("op_1572_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1572_pad_0 = const()[name = string("op_1572_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1572_dilations_0 = const()[name = string("op_1572_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1572_groups_0 = const()[name = string("op_1572_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107822336))), nonzero_data = tensor<fp16, [4516]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107813184))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1572_cast_fp16 = conv(dilations = var_1572_dilations_0, groups = var_1572_groups_0, pad = var_1572_pad_0, pad_type = var_1572_pad_type_0, strides = var_1572_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1572_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_21_cast_fp16 = add(x = var_1566_cast_fp16, y = var_1572_cast_fp16)[name = string("query_21_cast_fp16")];
+            string var_1581_pad_type_0 = const()[name = string("op_1581_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1581_strides_0 = const()[name = string("op_1581_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1581_pad_0 = const()[name = string("op_1581_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1581_dilations_0 = const()[name = string("op_1581_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1581_groups_0 = const()[name = string("op_1581_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107896128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108191104))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_1581_cast_fp16 = conv(dilations = var_1581_dilations_0, groups = var_1581_groups_0, pad = var_1581_pad_0, pad_type = var_1581_pad_type_0, strides = var_1581_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1581_cast_fp16")];
+            string var_1587_pad_type_0 = const()[name = string("op_1587_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1587_strides_0 = const()[name = string("op_1587_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1587_pad_0 = const()[name = string("op_1587_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1587_dilations_0 = const()[name = string("op_1587_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1587_groups_0 = const()[name = string("op_1587_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108200576))), nonzero_data = tensor<fp16, [4633]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108191232))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1587_cast_fp16 = conv(dilations = var_1587_dilations_0, groups = var_1587_groups_0, pad = var_1587_pad_0, pad_type = var_1587_pad_type_0, strides = var_1587_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1587_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_11_cast_fp16 = add(x = var_1581_cast_fp16, y = var_1587_cast_fp16)[name = string("current_key_11_cast_fp16")];
+            string var_1597_pad_type_0 = const()[name = string("op_1597_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1597_strides_0 = const()[name = string("op_1597_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1597_pad_0 = const()[name = string("op_1597_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1597_dilations_0 = const()[name = string("op_1597_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1597_groups_0 = const()[name = string("op_1597_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108274368))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108569344))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108569472)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1597_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1597_dilations_0, groups = var_1597_groups_0, pad = var_1597_pad_0, pad_type = var_1597_pad_type_0, strides = var_1597_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1597_cast_fp16")];
+            string var_1603_pad_type_0 = const()[name = string("op_1603_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1603_strides_0 = const()[name = string("op_1603_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1603_pad_0 = const()[name = string("op_1603_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1603_dilations_0 = const()[name = string("op_1603_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1603_groups_0 = const()[name = string("op_1603_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108577024))), nonzero_data = tensor<fp16, [2916]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108571072))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1603_cast_fp16 = conv(dilations = var_1603_dilations_0, groups = var_1603_groups_0, pad = var_1603_pad_0, pad_type = var_1603_pad_type_0, strides = var_1603_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1603_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_11_cast_fp16 = add(x = var_1597_cast_fp16, y = var_1603_cast_fp16)[name = string("current_value_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1609_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_202_cast_fp16)[name = string("op_1609_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_11_cast_fp16 = add(x = var_71_cast_fp16_5, y = var_1609_cast_fp16)[name = string("key_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1611_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_202_cast_fp16)[name = string("op_1611_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_11_cast_fp16 = add(x = var_86_cast_fp16_5, y = var_1611_cast_fp16)[name = string("value_11_cast_fp16")];
+            tensor<int32, [4]> var_1614 = const()[name = string("op_1614"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_21_cast_fp16 = reshape(shape = var_1614, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")];
+            fp16 var_1616_to_fp16 = const()[name = string("op_1616_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1617_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1616_to_fp16)[name = string("op_1617_cast_fp16")];
+            tensor<int32, [4]> var_1618 = const()[name = string("op_1618"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1619_cast_fp16 = reshape(shape = var_1618, x = key_11_cast_fp16)[name = string("op_1619_cast_fp16")];
+            bool mh_w_41_transpose_x_0 = const()[name = string("mh_w_41_transpose_x_0"), val = bool(true)];
+            bool mh_w_41_transpose_y_0 = const()[name = string("mh_w_41_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_1617_cast_fp16, y = var_1619_cast_fp16)[name = string("mh_w_41_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_43_cast_fp16 = add(x = mh_w_41_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_43_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_1627_cast_fp16 = softmax(axis = var_1519, x = mh_w_43_cast_fp16)[name = string("op_1627_cast_fp16")];
+            tensor<int32, [4]> var_1628 = const()[name = string("op_1628"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1629_cast_fp16 = reshape(shape = var_1628, x = value_11_cast_fp16)[name = string("op_1629_cast_fp16")];
+            bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)];
+            bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1629_cast_fp16, y = var_1627_cast_fp16)[name = string("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_1632 = const()[name = string("op_1632"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_51_cast_fp16 = reshape(shape = var_1632, x = attn_21_cast_fp16)[name = string("input_51_cast_fp16")];
+            string var_1642_pad_type_0 = const()[name = string("op_1642_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1642_strides_0 = const()[name = string("op_1642_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1642_pad_0 = const()[name = string("op_1642_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1642_dilations_0 = const()[name = string("op_1642_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1642_groups_0 = const()[name = string("op_1642_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108650816))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108945792))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108945920)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1642_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1642_dilations_0, groups = var_1642_groups_0, pad = var_1642_pad_0, pad_type = var_1642_pad_type_0, strides = var_1642_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1642_cast_fp16")];
+            string var_1648_pad_type_0 = const()[name = string("op_1648_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1648_strides_0 = const()[name = string("op_1648_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1648_pad_0 = const()[name = string("op_1648_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1648_dilations_0 = const()[name = string("op_1648_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1648_groups_0 = const()[name = string("op_1648_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108953920))), nonzero_data = tensor<fp16, [3145]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108947520))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1648_cast_fp16 = conv(dilations = var_1648_dilations_0, groups = var_1648_groups_0, pad = var_1648_pad_0, pad_type = var_1648_pad_type_0, strides = var_1648_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1648_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_103_cast_fp16 = add(x = var_1642_cast_fp16, y = var_1648_cast_fp16)[name = string("obj_103_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_103_cast_fp16)[name = string("inputs_33_cast_fp16")];
+            tensor<int32, [1]> out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1663_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")];
+            tensor<fp16, [768]> obj_105_gamma_0_to_fp16 = const()[name = string("obj_105_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109027712)))];
+            tensor<fp16, [768]> obj_105_beta_0_to_fp16 = const()[name = string("obj_105_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109029312)))];
+            fp16 obj_105_epsilon_0_to_fp16 = const()[name = string("obj_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_105_cast_fp16 = batch_norm(beta = obj_105_beta_0_to_fp16, epsilon = obj_105_epsilon_0_to_fp16, gamma = obj_105_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_105_cast_fp16")];
+            string var_1683_pad_type_0 = const()[name = string("op_1683_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1683_strides_0 = const()[name = string("op_1683_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1683_pad_0 = const()[name = string("op_1683_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1683_dilations_0 = const()[name = string("op_1683_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1683_groups_0 = const()[name = string("op_1683_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109030912))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109325888))))[name = string("layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109326016)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1683_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = string("op_1683_cast_fp16")];
+            string var_1689_pad_type_0 = const()[name = string("op_1689_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1689_strides_0 = const()[name = string("op_1689_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1689_pad_0 = const()[name = string("op_1689_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1689_dilations_0 = const()[name = string("op_1689_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1689_groups_0 = const()[name = string("op_1689_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109336960))), nonzero_data = tensor<fp16, [4626]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109327616))))[name = string("layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1689_cast_fp16 = conv(dilations = var_1689_dilations_0, groups = var_1689_groups_0, pad = var_1689_pad_0, pad_type = var_1689_pad_type_0, strides = var_1689_strides_0, weight = layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = string("op_1689_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_23_cast_fp16 = add(x = var_1683_cast_fp16, y = var_1689_cast_fp16)[name = string("query_23_cast_fp16")];
+            tensor<int32, [4]> var_1692 = const()[name = string("op_1692"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_23_cast_fp16 = reshape(shape = var_1692, x = query_23_cast_fp16)[name = string("mh_q_23_cast_fp16")];
+            fp16 var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1695_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_1694_to_fp16)[name = string("op_1695_cast_fp16")];
+            tensor<int32, [4]> var_1696 = const()[name = string("op_1696"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1697_cast_fp16 = reshape(shape = var_1696, x = obj_107_cast_fp16)[name = string("op_1697_cast_fp16")];
+            bool mh_w_45_transpose_x_0 = const()[name = string("mh_w_45_transpose_x_0"), val = bool(true)];
+            bool mh_w_45_transpose_y_0 = const()[name = string("mh_w_45_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_45_cast_fp16 = matmul(transpose_x = mh_w_45_transpose_x_0, transpose_y = mh_w_45_transpose_y_0, x = var_1695_cast_fp16, y = var_1697_cast_fp16)[name = string("mh_w_45_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_47_cast_fp16 = add(x = mh_w_45_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_47_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_113_cast_fp16 = softmax(axis = var_1519, x = mh_w_47_cast_fp16)[name = string("obj_113_cast_fp16")];
+            tensor<int32, [4]> var_1706 = const()[name = string("op_1706"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1707_cast_fp16 = reshape(shape = var_1706, x = obj_109_cast_fp16)[name = string("op_1707_cast_fp16")];
+            bool attn_23_transpose_x_0 = const()[name = string("attn_23_transpose_x_0"), val = bool(false)];
+            bool attn_23_transpose_y_0 = const()[name = string("attn_23_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1707_cast_fp16, y = obj_113_cast_fp16)[name = string("attn_23_cast_fp16")];
+            tensor<int32, [4]> var_1710 = const()[name = string("op_1710"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_53_cast_fp16 = reshape(shape = var_1710, x = attn_23_cast_fp16)[name = string("input_53_cast_fp16")];
+            string var_1720_pad_type_0 = const()[name = string("op_1720_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1720_strides_0 = const()[name = string("op_1720_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1720_pad_0 = const()[name = string("op_1720_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1720_dilations_0 = const()[name = string("op_1720_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1720_groups_0 = const()[name = string("op_1720_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109410752))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109705728))))[name = string("layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109705856)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1720_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1720_dilations_0, groups = var_1720_groups_0, pad = var_1720_pad_0, pad_type = var_1720_pad_type_0, strides = var_1720_strides_0, weight = layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("op_1720_cast_fp16")];
+            string var_1726_pad_type_0 = const()[name = string("op_1726_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1726_strides_0 = const()[name = string("op_1726_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1726_pad_0 = const()[name = string("op_1726_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1726_dilations_0 = const()[name = string("op_1726_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1726_groups_0 = const()[name = string("op_1726_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109714432))), nonzero_data = tensor<fp16, [3451]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109707456))))[name = string("layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1726_cast_fp16 = conv(dilations = var_1726_dilations_0, groups = var_1726_groups_0, pad = var_1726_pad_0, pad_type = var_1726_pad_type_0, strides = var_1726_strides_0, weight = layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_53_cast_fp16)[name = string("op_1726_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_111_cast_fp16 = add(x = var_1720_cast_fp16, y = var_1726_cast_fp16)[name = string("obj_111_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_111_cast_fp16)[name = string("inputs_35_cast_fp16")];
+            tensor<int32, [1]> out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1737_to_fp16 = const()[name = string("op_1737_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1737_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")];
+            tensor<fp16, [768]> input_55_gamma_0_to_fp16 = const()[name = string("input_55_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109788224)))];
+            tensor<fp16, [768]> input_55_beta_0_to_fp16 = const()[name = string("input_55_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109789824)))];
+            fp16 input_55_epsilon_0_to_fp16 = const()[name = string("input_55_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_55_cast_fp16")];
+            string var_1755_pad_type_0 = const()[name = string("op_1755_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1755_strides_0 = const()[name = string("op_1755_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1755_pad_0 = const()[name = string("op_1755_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1755_dilations_0 = const()[name = string("op_1755_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1755_groups_0 = const()[name = string("op_1755_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109791424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110971136))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110971264)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_1755_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1755_dilations_0, groups = var_1755_groups_0, pad = var_1755_pad_0, pad_type = var_1755_pad_type_0, strides = var_1755_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1755_cast_fp16")];
+            string var_1761_pad_type_0 = const()[name = string("op_1761_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1761_strides_0 = const()[name = string("op_1761_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1761_pad_0 = const()[name = string("op_1761_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1761_dilations_0 = const()[name = string("op_1761_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1761_groups_0 = const()[name = string("op_1761_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110998848))), nonzero_data = tensor<fp16, [10651]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110977472))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_1761_cast_fp16 = conv(dilations = var_1761_dilations_0, groups = var_1761_groups_0, pad = var_1761_pad_0, pad_type = var_1761_pad_type_0, strides = var_1761_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1761_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_57_cast_fp16 = add(x = var_1755_cast_fp16, y = var_1761_cast_fp16)[name = string("input_57_cast_fp16")];
+            string input_59_mode_0 = const()[name = string("input_59_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = string("input_59_cast_fp16")];
+            string var_1772_pad_type_0 = const()[name = string("op_1772_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1772_strides_0 = const()[name = string("op_1772_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1772_pad_0 = const()[name = string("op_1772_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1772_dilations_0 = const()[name = string("op_1772_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1772_groups_0 = const()[name = string("op_1772_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111293824))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112473536))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112473664)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1772_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1772_dilations_0, groups = var_1772_groups_0, pad = var_1772_pad_0, pad_type = var_1772_pad_type_0, strides = var_1772_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1772_cast_fp16")];
+            string var_1778_pad_type_0 = const()[name = string("op_1778_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1778_strides_0 = const()[name = string("op_1778_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1778_pad_0 = const()[name = string("op_1778_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1778_dilations_0 = const()[name = string("op_1778_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1778_groups_0 = const()[name = string("op_1778_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112497472))), nonzero_data = tensor<fp16, [11064]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112475264))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1778_cast_fp16 = conv(dilations = var_1778_dilations_0, groups = var_1778_groups_0, pad = var_1778_pad_0, pad_type = var_1778_pad_type_0, strides = var_1778_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1778_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_13_cast_fp16 = add(x = var_1772_cast_fp16, y = var_1778_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_37_cast_fp16")];
+            tensor<int32, [4]> obj_125_begin_0 = const()[name = string("obj_125_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> obj_125_end_0 = const()[name = string("obj_125_end_0"), val = tensor<int32, [4]>([7, 768, 1, 1536])];
+            tensor<bool, [4]> obj_125_end_mask_0 = const()[name = string("obj_125_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_125_cast_fp16 = slice_by_index(begin = obj_125_begin_0, end = obj_125_end_0, end_mask = obj_125_end_mask_0, x = read_state_2)[name = string("obj_125_cast_fp16")];
+            tensor<int32, [4]> obj_127_begin_0 = const()[name = string("obj_127_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> obj_127_end_0 = const()[name = string("obj_127_end_0"), val = tensor<int32, [4]>([7, 768, 1, 1536])];
+            tensor<bool, [4]> obj_127_end_mask_0 = const()[name = string("obj_127_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_127_cast_fp16 = slice_by_index(begin = obj_127_begin_0, end = obj_127_end_0, end_mask = obj_127_end_mask_0, x = read_state_3)[name = string("obj_127_cast_fp16")];
+            int32 var_1800 = const()[name = string("op_1800"), val = int32(3)];
+            tensor<int32, [1]> out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1825_to_fp16 = const()[name = string("op_1825_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1825_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")];
+            tensor<fp16, [768]> obj_115_gamma_0_to_fp16 = const()[name = string("obj_115_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112792448)))];
+            tensor<fp16, [768]> obj_115_beta_0_to_fp16 = const()[name = string("obj_115_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112794048)))];
+            fp16 obj_115_epsilon_0_to_fp16 = const()[name = string("obj_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_115_cast_fp16 = batch_norm(beta = obj_115_beta_0_to_fp16, epsilon = obj_115_epsilon_0_to_fp16, gamma = obj_115_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_115_cast_fp16")];
+            string var_1847_pad_type_0 = const()[name = string("op_1847_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1847_strides_0 = const()[name = string("op_1847_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1847_pad_0 = const()[name = string("op_1847_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1847_dilations_0 = const()[name = string("op_1847_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1847_groups_0 = const()[name = string("op_1847_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112795648))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113090624))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113090752)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1847_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1847_dilations_0, groups = var_1847_groups_0, pad = var_1847_pad_0, pad_type = var_1847_pad_type_0, strides = var_1847_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1847_cast_fp16")];
+            string var_1853_pad_type_0 = const()[name = string("op_1853_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1853_strides_0 = const()[name = string("op_1853_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1853_pad_0 = const()[name = string("op_1853_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1853_dilations_0 = const()[name = string("op_1853_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1853_groups_0 = const()[name = string("op_1853_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113100672))), nonzero_data = tensor<fp16, [4098]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113092352))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1853_cast_fp16 = conv(dilations = var_1853_dilations_0, groups = var_1853_groups_0, pad = var_1853_pad_0, pad_type = var_1853_pad_type_0, strides = var_1853_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1853_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_25_cast_fp16 = add(x = var_1847_cast_fp16, y = var_1853_cast_fp16)[name = string("query_25_cast_fp16")];
+            string var_1862_pad_type_0 = const()[name = string("op_1862_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1862_strides_0 = const()[name = string("op_1862_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1862_pad_0 = const()[name = string("op_1862_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1862_dilations_0 = const()[name = string("op_1862_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1862_groups_0 = const()[name = string("op_1862_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113174464))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113469440))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_1862_cast_fp16 = conv(dilations = var_1862_dilations_0, groups = var_1862_groups_0, pad = var_1862_pad_0, pad_type = var_1862_pad_type_0, strides = var_1862_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1862_cast_fp16")];
+            string var_1868_pad_type_0 = const()[name = string("op_1868_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1868_strides_0 = const()[name = string("op_1868_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1868_pad_0 = const()[name = string("op_1868_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1868_dilations_0 = const()[name = string("op_1868_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1868_groups_0 = const()[name = string("op_1868_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113478080))), nonzero_data = tensor<fp16, [4200]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113469568))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1868_cast_fp16 = conv(dilations = var_1868_dilations_0, groups = var_1868_groups_0, pad = var_1868_pad_0, pad_type = var_1868_pad_type_0, strides = var_1868_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1868_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_13_cast_fp16 = add(x = var_1862_cast_fp16, y = var_1868_cast_fp16)[name = string("current_key_13_cast_fp16")];
+            string var_1878_pad_type_0 = const()[name = string("op_1878_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1878_strides_0 = const()[name = string("op_1878_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1878_pad_0 = const()[name = string("op_1878_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1878_dilations_0 = const()[name = string("op_1878_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1878_groups_0 = const()[name = string("op_1878_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113551872))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113846848))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113846976)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1878_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1878_dilations_0, groups = var_1878_groups_0, pad = var_1878_pad_0, pad_type = var_1878_pad_type_0, strides = var_1878_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1878_cast_fp16")];
+            string var_1884_pad_type_0 = const()[name = string("op_1884_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1884_strides_0 = const()[name = string("op_1884_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1884_pad_0 = const()[name = string("op_1884_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1884_dilations_0 = const()[name = string("op_1884_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1884_groups_0 = const()[name = string("op_1884_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113853824))), nonzero_data = tensor<fp16, [2577]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113848576))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1884_cast_fp16 = conv(dilations = var_1884_dilations_0, groups = var_1884_groups_0, pad = var_1884_pad_0, pad_type = var_1884_pad_type_0, strides = var_1884_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1884_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_13_cast_fp16 = add(x = var_1878_cast_fp16, y = var_1884_cast_fp16)[name = string("current_value_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1890_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_202_cast_fp16)[name = string("op_1890_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_13_cast_fp16 = add(x = var_71_cast_fp16_6, y = var_1890_cast_fp16)[name = string("key_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1892_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_202_cast_fp16)[name = string("op_1892_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_13_cast_fp16 = add(x = var_86_cast_fp16_6, y = var_1892_cast_fp16)[name = string("value_13_cast_fp16")];
+            tensor<int32, [4]> var_1895 = const()[name = string("op_1895"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_25_cast_fp16 = reshape(shape = var_1895, x = query_25_cast_fp16)[name = string("mh_q_25_cast_fp16")];
+            fp16 var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1898_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_1897_to_fp16)[name = string("op_1898_cast_fp16")];
+            tensor<int32, [4]> var_1899 = const()[name = string("op_1899"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1900_cast_fp16 = reshape(shape = var_1899, x = key_13_cast_fp16)[name = string("op_1900_cast_fp16")];
+            bool mh_w_49_transpose_x_0 = const()[name = string("mh_w_49_transpose_x_0"), val = bool(true)];
+            bool mh_w_49_transpose_y_0 = const()[name = string("mh_w_49_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_1898_cast_fp16, y = var_1900_cast_fp16)[name = string("mh_w_49_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_51_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_1908_cast_fp16 = softmax(axis = var_1800, x = mh_w_51_cast_fp16)[name = string("op_1908_cast_fp16")];
+            tensor<int32, [4]> var_1909 = const()[name = string("op_1909"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1910_cast_fp16 = reshape(shape = var_1909, x = value_13_cast_fp16)[name = string("op_1910_cast_fp16")];
+            bool attn_25_transpose_x_0 = const()[name = string("attn_25_transpose_x_0"), val = bool(false)];
+            bool attn_25_transpose_y_0 = const()[name = string("attn_25_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1910_cast_fp16, y = var_1908_cast_fp16)[name = string("attn_25_cast_fp16")];
+            tensor<int32, [4]> var_1913 = const()[name = string("op_1913"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_61_cast_fp16 = reshape(shape = var_1913, x = attn_25_cast_fp16)[name = string("input_61_cast_fp16")];
+            string var_1923_pad_type_0 = const()[name = string("op_1923_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1923_strides_0 = const()[name = string("op_1923_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1923_pad_0 = const()[name = string("op_1923_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1923_dilations_0 = const()[name = string("op_1923_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1923_groups_0 = const()[name = string("op_1923_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113927616))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114222592))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114222720)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1923_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1923_dilations_0, groups = var_1923_groups_0, pad = var_1923_pad_0, pad_type = var_1923_pad_type_0, strides = var_1923_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("op_1923_cast_fp16")];
+            string var_1929_pad_type_0 = const()[name = string("op_1929_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1929_strides_0 = const()[name = string("op_1929_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1929_pad_0 = const()[name = string("op_1929_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1929_dilations_0 = const()[name = string("op_1929_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1929_groups_0 = const()[name = string("op_1929_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114230208))), nonzero_data = tensor<fp16, [2896]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114224320))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1929_cast_fp16 = conv(dilations = var_1929_dilations_0, groups = var_1929_groups_0, pad = var_1929_pad_0, pad_type = var_1929_pad_type_0, strides = var_1929_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_61_cast_fp16)[name = string("op_1929_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_121_cast_fp16 = add(x = var_1923_cast_fp16, y = var_1929_cast_fp16)[name = string("obj_121_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_121_cast_fp16)[name = string("inputs_39_cast_fp16")];
+            tensor<int32, [1]> out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1944_to_fp16 = const()[name = string("op_1944_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1944_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")];
+            tensor<fp16, [768]> obj_123_gamma_0_to_fp16 = const()[name = string("obj_123_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114304000)))];
+            tensor<fp16, [768]> obj_123_beta_0_to_fp16 = const()[name = string("obj_123_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114305600)))];
+            fp16 obj_123_epsilon_0_to_fp16 = const()[name = string("obj_123_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_123_cast_fp16 = batch_norm(beta = obj_123_beta_0_to_fp16, epsilon = obj_123_epsilon_0_to_fp16, gamma = obj_123_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("obj_123_cast_fp16")];
+            string var_1964_pad_type_0 = const()[name = string("op_1964_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1964_strides_0 = const()[name = string("op_1964_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1964_pad_0 = const()[name = string("op_1964_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1964_dilations_0 = const()[name = string("op_1964_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1964_groups_0 = const()[name = string("op_1964_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114307200))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114602176))))[name = string("layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114602304)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1964_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1964_dilations_0, groups = var_1964_groups_0, pad = var_1964_pad_0, pad_type = var_1964_pad_type_0, strides = var_1964_strides_0, weight = layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_123_cast_fp16)[name = string("op_1964_cast_fp16")];
+            string var_1970_pad_type_0 = const()[name = string("op_1970_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1970_strides_0 = const()[name = string("op_1970_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1970_pad_0 = const()[name = string("op_1970_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1970_dilations_0 = const()[name = string("op_1970_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1970_groups_0 = const()[name = string("op_1970_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114610752))), nonzero_data = tensor<fp16, [3367]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114603904))))[name = string("layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1970_cast_fp16 = conv(dilations = var_1970_dilations_0, groups = var_1970_groups_0, pad = var_1970_pad_0, pad_type = var_1970_pad_type_0, strides = var_1970_strides_0, weight = layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_123_cast_fp16)[name = string("op_1970_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_27_cast_fp16 = add(x = var_1964_cast_fp16, y = var_1970_cast_fp16)[name = string("query_27_cast_fp16")];
+            tensor<int32, [4]> var_1973 = const()[name = string("op_1973"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_27_cast_fp16 = reshape(shape = var_1973, x = query_27_cast_fp16)[name = string("mh_q_27_cast_fp16")];
+            fp16 var_1975_to_fp16 = const()[name = string("op_1975_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1976_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_1975_to_fp16)[name = string("op_1976_cast_fp16")];
+            tensor<int32, [4]> var_1977 = const()[name = string("op_1977"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1978_cast_fp16 = reshape(shape = var_1977, x = obj_125_cast_fp16)[name = string("op_1978_cast_fp16")];
+            bool mh_w_53_transpose_x_0 = const()[name = string("mh_w_53_transpose_x_0"), val = bool(true)];
+            bool mh_w_53_transpose_y_0 = const()[name = string("mh_w_53_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_1976_cast_fp16, y = var_1978_cast_fp16)[name = string("mh_w_53_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_55_cast_fp16 = add(x = mh_w_53_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_55_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_131_cast_fp16 = softmax(axis = var_1800, x = mh_w_55_cast_fp16)[name = string("obj_131_cast_fp16")];
+            tensor<int32, [4]> var_1987 = const()[name = string("op_1987"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1988_cast_fp16 = reshape(shape = var_1987, x = obj_127_cast_fp16)[name = string("op_1988_cast_fp16")];
+            bool attn_27_transpose_x_0 = const()[name = string("attn_27_transpose_x_0"), val = bool(false)];
+            bool attn_27_transpose_y_0 = const()[name = string("attn_27_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_1988_cast_fp16, y = obj_131_cast_fp16)[name = string("attn_27_cast_fp16")];
+            tensor<int32, [4]> var_1991 = const()[name = string("op_1991"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_63_cast_fp16 = reshape(shape = var_1991, x = attn_27_cast_fp16)[name = string("input_63_cast_fp16")];
+            string var_2001_pad_type_0 = const()[name = string("op_2001_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2001_strides_0 = const()[name = string("op_2001_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2001_pad_0 = const()[name = string("op_2001_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2001_dilations_0 = const()[name = string("op_2001_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2001_groups_0 = const()[name = string("op_2001_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114684544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114979520))))[name = string("layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114979648)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2001_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2001_dilations_0, groups = var_2001_groups_0, pad = var_2001_pad_0, pad_type = var_2001_pad_type_0, strides = var_2001_strides_0, weight = layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_2001_cast_fp16")];
+            string var_2007_pad_type_0 = const()[name = string("op_2007_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2007_strides_0 = const()[name = string("op_2007_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2007_pad_0 = const()[name = string("op_2007_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2007_dilations_0 = const()[name = string("op_2007_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2007_groups_0 = const()[name = string("op_2007_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114987584))), nonzero_data = tensor<fp16, [3105]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114981248))))[name = string("layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2007_cast_fp16 = conv(dilations = var_2007_dilations_0, groups = var_2007_groups_0, pad = var_2007_pad_0, pad_type = var_2007_pad_type_0, strides = var_2007_strides_0, weight = layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_2007_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_129_cast_fp16 = add(x = var_2001_cast_fp16, y = var_2007_cast_fp16)[name = string("obj_129_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_129_cast_fp16)[name = string("inputs_41_cast_fp16")];
+            tensor<int32, [1]> out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2021_to_fp16 = const()[name = string("op_2021_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_2021_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")];
+            tensor<fp16, [768]> input_65_gamma_0_to_fp16 = const()[name = string("input_65_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115061376)))];
+            tensor<fp16, [768]> input_65_beta_0_to_fp16 = const()[name = string("input_65_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115062976)))];
+            fp16 input_65_epsilon_0_to_fp16 = const()[name = string("input_65_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("input_65_cast_fp16")];
+            string var_2039_pad_type_0 = const()[name = string("op_2039_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2039_strides_0 = const()[name = string("op_2039_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2039_pad_0 = const()[name = string("op_2039_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2039_dilations_0 = const()[name = string("op_2039_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2039_groups_0 = const()[name = string("op_2039_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115064576))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116244288))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116244416)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_2039_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_2039_dilations_0, groups = var_2039_groups_0, pad = var_2039_pad_0, pad_type = var_2039_pad_type_0, strides = var_2039_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_2039_cast_fp16")];
+            string var_2045_pad_type_0 = const()[name = string("op_2045_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2045_strides_0 = const()[name = string("op_2045_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2045_pad_0 = const()[name = string("op_2045_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2045_dilations_0 = const()[name = string("op_2045_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2045_groups_0 = const()[name = string("op_2045_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116270336))), nonzero_data = tensor<fp16, [9804]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116250624))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_2045_cast_fp16 = conv(dilations = var_2045_dilations_0, groups = var_2045_groups_0, pad = var_2045_pad_0, pad_type = var_2045_pad_type_0, strides = var_2045_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_2045_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_67_cast_fp16 = add(x = var_2039_cast_fp16, y = var_2045_cast_fp16)[name = string("input_67_cast_fp16")];
+            string input_69_mode_0 = const()[name = string("input_69_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = string("input_69_cast_fp16")];
+            string var_2056_pad_type_0 = const()[name = string("op_2056_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2056_strides_0 = const()[name = string("op_2056_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2056_pad_0 = const()[name = string("op_2056_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2056_dilations_0 = const()[name = string("op_2056_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2056_groups_0 = const()[name = string("op_2056_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116565312))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117745024))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117745152)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2056_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_2056_dilations_0, groups = var_2056_groups_0, pad = var_2056_pad_0, pad_type = var_2056_pad_type_0, strides = var_2056_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("op_2056_cast_fp16")];
+            string var_2062_pad_type_0 = const()[name = string("op_2062_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2062_strides_0 = const()[name = string("op_2062_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2062_pad_0 = const()[name = string("op_2062_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2062_dilations_0 = const()[name = string("op_2062_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2062_groups_0 = const()[name = string("op_2062_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117773312))), nonzero_data = tensor<fp16, [13229]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117746752))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2062_cast_fp16 = conv(dilations = var_2062_dilations_0, groups = var_2062_groups_0, pad = var_2062_pad_0, pad_type = var_2062_pad_type_0, strides = var_2062_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_69_cast_fp16)[name = string("op_2062_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_15_cast_fp16 = add(x = var_2056_cast_fp16, y = var_2062_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_43_cast_fp16")];
+            tensor<int32, [4]> obj_143_begin_0 = const()[name = string("obj_143_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> obj_143_end_0 = const()[name = string("obj_143_end_0"), val = tensor<int32, [4]>([8, 768, 1, 1536])];
+            tensor<bool, [4]> obj_143_end_mask_0 = const()[name = string("obj_143_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_143_cast_fp16 = slice_by_index(begin = obj_143_begin_0, end = obj_143_end_0, end_mask = obj_143_end_mask_0, x = read_state_2)[name = string("obj_143_cast_fp16")];
+            tensor<int32, [4]> obj_145_begin_0 = const()[name = string("obj_145_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> obj_145_end_0 = const()[name = string("obj_145_end_0"), val = tensor<int32, [4]>([8, 768, 1, 1536])];
+            tensor<bool, [4]> obj_145_end_mask_0 = const()[name = string("obj_145_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_145_cast_fp16 = slice_by_index(begin = obj_145_begin_0, end = obj_145_end_0, end_mask = obj_145_end_mask_0, x = read_state_3)[name = string("obj_145_cast_fp16")];
+            int32 var_2085 = const()[name = string("op_2085"), val = int32(3)];
+            tensor<int32, [1]> out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2110_to_fp16 = const()[name = string("op_2110_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2110_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")];
+            tensor<fp16, [768]> obj_133_gamma_0_to_fp16 = const()[name = string("obj_133_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118068288)))];
+            tensor<fp16, [768]> obj_133_beta_0_to_fp16 = const()[name = string("obj_133_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118069888)))];
+            fp16 obj_133_epsilon_0_to_fp16 = const()[name = string("obj_133_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_133_cast_fp16 = batch_norm(beta = obj_133_beta_0_to_fp16, epsilon = obj_133_epsilon_0_to_fp16, gamma = obj_133_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("obj_133_cast_fp16")];
+            string var_2132_pad_type_0 = const()[name = string("op_2132_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2132_strides_0 = const()[name = string("op_2132_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2132_pad_0 = const()[name = string("op_2132_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2132_dilations_0 = const()[name = string("op_2132_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2132_groups_0 = const()[name = string("op_2132_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118071488))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118366464))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118366592)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2132_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2132_dilations_0, groups = var_2132_groups_0, pad = var_2132_pad_0, pad_type = var_2132_pad_type_0, strides = var_2132_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2132_cast_fp16")];
+            string var_2138_pad_type_0 = const()[name = string("op_2138_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2138_strides_0 = const()[name = string("op_2138_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2138_pad_0 = const()[name = string("op_2138_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2138_dilations_0 = const()[name = string("op_2138_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2138_groups_0 = const()[name = string("op_2138_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118374336))), nonzero_data = tensor<fp16, [3010]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118368192))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2138_cast_fp16 = conv(dilations = var_2138_dilations_0, groups = var_2138_groups_0, pad = var_2138_pad_0, pad_type = var_2138_pad_type_0, strides = var_2138_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2138_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_29_cast_fp16 = add(x = var_2132_cast_fp16, y = var_2138_cast_fp16)[name = string("query_29_cast_fp16")];
+            string var_2147_pad_type_0 = const()[name = string("op_2147_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2147_strides_0 = const()[name = string("op_2147_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2147_pad_0 = const()[name = string("op_2147_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2147_dilations_0 = const()[name = string("op_2147_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2147_groups_0 = const()[name = string("op_2147_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118448128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118743104))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_2147_cast_fp16 = conv(dilations = var_2147_dilations_0, groups = var_2147_groups_0, pad = var_2147_pad_0, pad_type = var_2147_pad_type_0, strides = var_2147_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2147_cast_fp16")];
+            string var_2153_pad_type_0 = const()[name = string("op_2153_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2153_strides_0 = const()[name = string("op_2153_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2153_pad_0 = const()[name = string("op_2153_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2153_dilations_0 = const()[name = string("op_2153_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2153_groups_0 = const()[name = string("op_2153_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118749696))), nonzero_data = tensor<fp16, [3175]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118743232))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2153_cast_fp16 = conv(dilations = var_2153_dilations_0, groups = var_2153_groups_0, pad = var_2153_pad_0, pad_type = var_2153_pad_type_0, strides = var_2153_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2153_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_15_cast_fp16 = add(x = var_2147_cast_fp16, y = var_2153_cast_fp16)[name = string("current_key_15_cast_fp16")];
+            string var_2163_pad_type_0 = const()[name = string("op_2163_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2163_strides_0 = const()[name = string("op_2163_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2163_pad_0 = const()[name = string("op_2163_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2163_dilations_0 = const()[name = string("op_2163_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2163_groups_0 = const()[name = string("op_2163_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118823488))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119118464))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119118592)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2163_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2163_dilations_0, groups = var_2163_groups_0, pad = var_2163_pad_0, pad_type = var_2163_pad_type_0, strides = var_2163_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2163_cast_fp16")];
+            string var_2169_pad_type_0 = const()[name = string("op_2169_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2169_strides_0 = const()[name = string("op_2169_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2169_pad_0 = const()[name = string("op_2169_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2169_dilations_0 = const()[name = string("op_2169_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2169_groups_0 = const()[name = string("op_2169_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119125184))), nonzero_data = tensor<fp16, [2435]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119120192))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2169_cast_fp16 = conv(dilations = var_2169_dilations_0, groups = var_2169_groups_0, pad = var_2169_pad_0, pad_type = var_2169_pad_type_0, strides = var_2169_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2169_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_15_cast_fp16 = add(x = var_2163_cast_fp16, y = var_2169_cast_fp16)[name = string("current_value_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2175_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_202_cast_fp16)[name = string("op_2175_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_15_cast_fp16 = add(x = var_71_cast_fp16_7, y = var_2175_cast_fp16)[name = string("key_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2177_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_202_cast_fp16)[name = string("op_2177_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_15_cast_fp16 = add(x = var_86_cast_fp16_7, y = var_2177_cast_fp16)[name = string("value_15_cast_fp16")];
+            tensor<int32, [4]> var_2180 = const()[name = string("op_2180"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_29_cast_fp16 = reshape(shape = var_2180, x = query_29_cast_fp16)[name = string("mh_q_29_cast_fp16")];
+            fp16 var_2182_to_fp16 = const()[name = string("op_2182_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2183_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_2182_to_fp16)[name = string("op_2183_cast_fp16")];
+            tensor<int32, [4]> var_2184 = const()[name = string("op_2184"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2185_cast_fp16 = reshape(shape = var_2184, x = key_15_cast_fp16)[name = string("op_2185_cast_fp16")];
+            bool mh_w_57_transpose_x_0 = const()[name = string("mh_w_57_transpose_x_0"), val = bool(true)];
+            bool mh_w_57_transpose_y_0 = const()[name = string("mh_w_57_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_57_cast_fp16 = matmul(transpose_x = mh_w_57_transpose_x_0, transpose_y = mh_w_57_transpose_y_0, x = var_2183_cast_fp16, y = var_2185_cast_fp16)[name = string("mh_w_57_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_59_cast_fp16 = add(x = mh_w_57_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_59_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_2193_cast_fp16 = softmax(axis = var_2085, x = mh_w_59_cast_fp16)[name = string("op_2193_cast_fp16")];
+            tensor<int32, [4]> var_2194 = const()[name = string("op_2194"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2195_cast_fp16 = reshape(shape = var_2194, x = value_15_cast_fp16)[name = string("op_2195_cast_fp16")];
+            bool attn_29_transpose_x_0 = const()[name = string("attn_29_transpose_x_0"), val = bool(false)];
+            bool attn_29_transpose_y_0 = const()[name = string("attn_29_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2195_cast_fp16, y = var_2193_cast_fp16)[name = string("attn_29_cast_fp16")];
+            tensor<int32, [4]> var_2198 = const()[name = string("op_2198"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_71_cast_fp16 = reshape(shape = var_2198, x = attn_29_cast_fp16)[name = string("input_71_cast_fp16")];
+            string var_2208_pad_type_0 = const()[name = string("op_2208_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2208_strides_0 = const()[name = string("op_2208_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2208_pad_0 = const()[name = string("op_2208_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2208_dilations_0 = const()[name = string("op_2208_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2208_groups_0 = const()[name = string("op_2208_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119198976))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119493952))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119494080)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2208_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2208_dilations_0, groups = var_2208_groups_0, pad = var_2208_pad_0, pad_type = var_2208_pad_type_0, strides = var_2208_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_2208_cast_fp16")];
+            string var_2214_pad_type_0 = const()[name = string("op_2214_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2214_strides_0 = const()[name = string("op_2214_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2214_pad_0 = const()[name = string("op_2214_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2214_dilations_0 = const()[name = string("op_2214_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2214_groups_0 = const()[name = string("op_2214_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119501120))), nonzero_data = tensor<fp16, [2666]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119495680))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2214_cast_fp16 = conv(dilations = var_2214_dilations_0, groups = var_2214_groups_0, pad = var_2214_pad_0, pad_type = var_2214_pad_type_0, strides = var_2214_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_2214_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_139_cast_fp16 = add(x = var_2208_cast_fp16, y = var_2214_cast_fp16)[name = string("obj_139_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_139_cast_fp16)[name = string("inputs_45_cast_fp16")];
+            tensor<int32, [1]> out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2229_to_fp16 = const()[name = string("op_2229_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2229_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")];
+            tensor<fp16, [768]> obj_141_gamma_0_to_fp16 = const()[name = string("obj_141_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119574912)))];
+            tensor<fp16, [768]> obj_141_beta_0_to_fp16 = const()[name = string("obj_141_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119576512)))];
+            fp16 obj_141_epsilon_0_to_fp16 = const()[name = string("obj_141_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_141_cast_fp16")];
+            string var_2249_pad_type_0 = const()[name = string("op_2249_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2249_strides_0 = const()[name = string("op_2249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2249_pad_0 = const()[name = string("op_2249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2249_dilations_0 = const()[name = string("op_2249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2249_groups_0 = const()[name = string("op_2249_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119578112))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119873088))))[name = string("layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119873216)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2249_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2249_dilations_0, groups = var_2249_groups_0, pad = var_2249_pad_0, pad_type = var_2249_pad_type_0, strides = var_2249_strides_0, weight = layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = string("op_2249_cast_fp16")];
+            string var_2255_pad_type_0 = const()[name = string("op_2255_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2255_strides_0 = const()[name = string("op_2255_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2255_pad_0 = const()[name = string("op_2255_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2255_dilations_0 = const()[name = string("op_2255_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2255_groups_0 = const()[name = string("op_2255_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119881664))), nonzero_data = tensor<fp16, [3365]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119874816))))[name = string("layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2255_cast_fp16 = conv(dilations = var_2255_dilations_0, groups = var_2255_groups_0, pad = var_2255_pad_0, pad_type = var_2255_pad_type_0, strides = var_2255_strides_0, weight = layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = string("op_2255_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_31_cast_fp16 = add(x = var_2249_cast_fp16, y = var_2255_cast_fp16)[name = string("query_31_cast_fp16")];
+            tensor<int32, [4]> var_2258 = const()[name = string("op_2258"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_31_cast_fp16 = reshape(shape = var_2258, x = query_31_cast_fp16)[name = string("mh_q_31_cast_fp16")];
+            fp16 var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2261_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2260_to_fp16)[name = string("op_2261_cast_fp16")];
+            tensor<int32, [4]> var_2262 = const()[name = string("op_2262"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2263_cast_fp16 = reshape(shape = var_2262, x = obj_143_cast_fp16)[name = string("op_2263_cast_fp16")];
+            bool mh_w_61_transpose_x_0 = const()[name = string("mh_w_61_transpose_x_0"), val = bool(true)];
+            bool mh_w_61_transpose_y_0 = const()[name = string("mh_w_61_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_2261_cast_fp16, y = var_2263_cast_fp16)[name = string("mh_w_61_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_63_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_149_cast_fp16 = softmax(axis = var_2085, x = mh_w_63_cast_fp16)[name = string("obj_149_cast_fp16")];
+            tensor<int32, [4]> var_2272 = const()[name = string("op_2272"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2273_cast_fp16 = reshape(shape = var_2272, x = obj_145_cast_fp16)[name = string("op_2273_cast_fp16")];
+            bool attn_31_transpose_x_0 = const()[name = string("attn_31_transpose_x_0"), val = bool(false)];
+            bool attn_31_transpose_y_0 = const()[name = string("attn_31_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2273_cast_fp16, y = obj_149_cast_fp16)[name = string("attn_31_cast_fp16")];
+            tensor<int32, [4]> var_2276 = const()[name = string("op_2276"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_73_cast_fp16 = reshape(shape = var_2276, x = attn_31_cast_fp16)[name = string("input_73_cast_fp16")];
+            string var_2286_pad_type_0 = const()[name = string("op_2286_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2286_strides_0 = const()[name = string("op_2286_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2286_pad_0 = const()[name = string("op_2286_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2286_dilations_0 = const()[name = string("op_2286_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2286_groups_0 = const()[name = string("op_2286_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119955456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120250432))))[name = string("layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120250560)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2286_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2286_dilations_0, groups = var_2286_groups_0, pad = var_2286_pad_0, pad_type = var_2286_pad_type_0, strides = var_2286_strides_0, weight = layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_2286_cast_fp16")];
+            string var_2292_pad_type_0 = const()[name = string("op_2292_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2292_strides_0 = const()[name = string("op_2292_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2292_pad_0 = const()[name = string("op_2292_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2292_dilations_0 = const()[name = string("op_2292_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2292_groups_0 = const()[name = string("op_2292_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120257920))), nonzero_data = tensor<fp16, [2824]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120252160))))[name = string("layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2292_cast_fp16 = conv(dilations = var_2292_dilations_0, groups = var_2292_groups_0, pad = var_2292_pad_0, pad_type = var_2292_pad_type_0, strides = var_2292_strides_0, weight = layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_2292_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_147_cast_fp16 = add(x = var_2286_cast_fp16, y = var_2292_cast_fp16)[name = string("obj_147_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_147_cast_fp16)[name = string("inputs_47_cast_fp16")];
+            tensor<int32, [1]> out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2306_to_fp16 = const()[name = string("op_2306_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2306_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")];
+            tensor<fp16, [768]> input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120331712)))];
+            tensor<fp16, [768]> input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120333312)))];
+            fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_75_cast_fp16")];
+            string var_2324_pad_type_0 = const()[name = string("op_2324_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2324_strides_0 = const()[name = string("op_2324_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2324_pad_0 = const()[name = string("op_2324_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2324_dilations_0 = const()[name = string("op_2324_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2324_groups_0 = const()[name = string("op_2324_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(120334912))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121514624))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121514752)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_2324_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_2324_dilations_0, groups = var_2324_groups_0, pad = var_2324_pad_0, pad_type = var_2324_pad_type_0, strides = var_2324_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_2324_cast_fp16")];
+            string var_2330_pad_type_0 = const()[name = string("op_2330_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2330_strides_0 = const()[name = string("op_2330_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2330_pad_0 = const()[name = string("op_2330_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2330_dilations_0 = const()[name = string("op_2330_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2330_groups_0 = const()[name = string("op_2330_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121540160))), nonzero_data = tensor<fp16, [9551]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121520960))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_2330_cast_fp16 = conv(dilations = var_2330_dilations_0, groups = var_2330_groups_0, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2330_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_2330_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_77_cast_fp16 = add(x = var_2324_cast_fp16, y = var_2330_cast_fp16)[name = string("input_77_cast_fp16")];
+            string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")];
+            string var_2341_pad_type_0 = const()[name = string("op_2341_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2341_strides_0 = const()[name = string("op_2341_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2341_pad_0 = const()[name = string("op_2341_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2341_dilations_0 = const()[name = string("op_2341_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2341_groups_0 = const()[name = string("op_2341_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121835136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123014848))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123014976)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2341_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_2341_dilations_0, groups = var_2341_groups_0, pad = var_2341_pad_0, pad_type = var_2341_pad_type_0, strides = var_2341_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_2341_cast_fp16")];
+            string var_2347_pad_type_0 = const()[name = string("op_2347_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2347_strides_0 = const()[name = string("op_2347_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2347_pad_0 = const()[name = string("op_2347_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2347_dilations_0 = const()[name = string("op_2347_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2347_groups_0 = const()[name = string("op_2347_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123040960))), nonzero_data = tensor<fp16, [12143]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123016576))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2347_cast_fp16 = conv(dilations = var_2347_dilations_0, groups = var_2347_groups_0, pad = var_2347_pad_0, pad_type = var_2347_pad_type_0, strides = var_2347_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_2347_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_17_cast_fp16 = add(x = var_2341_cast_fp16, y = var_2347_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_49_cast_fp16")];
+            tensor<int32, [4]> obj_161_begin_0 = const()[name = string("obj_161_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> obj_161_end_0 = const()[name = string("obj_161_end_0"), val = tensor<int32, [4]>([9, 768, 1, 1536])];
+            tensor<bool, [4]> obj_161_end_mask_0 = const()[name = string("obj_161_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_161_cast_fp16 = slice_by_index(begin = obj_161_begin_0, end = obj_161_end_0, end_mask = obj_161_end_mask_0, x = read_state_2)[name = string("obj_161_cast_fp16")];
+            tensor<int32, [4]> obj_163_begin_0 = const()[name = string("obj_163_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> obj_163_end_0 = const()[name = string("obj_163_end_0"), val = tensor<int32, [4]>([9, 768, 1, 1536])];
+            tensor<bool, [4]> obj_163_end_mask_0 = const()[name = string("obj_163_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_163_cast_fp16 = slice_by_index(begin = obj_163_begin_0, end = obj_163_end_0, end_mask = obj_163_end_mask_0, x = read_state_3)[name = string("obj_163_cast_fp16")];
+            int32 var_2370 = const()[name = string("op_2370"), val = int32(3)];
+            tensor<int32, [1]> out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2395_to_fp16 = const()[name = string("op_2395_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_2395_to_fp16, x = inputs_49_cast_fp16)[name = string("out_49_cast_fp16")];
+            tensor<fp16, [768]> obj_151_gamma_0_to_fp16 = const()[name = string("obj_151_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123335936)))];
+            tensor<fp16, [768]> obj_151_beta_0_to_fp16 = const()[name = string("obj_151_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123337536)))];
+            fp16 obj_151_epsilon_0_to_fp16 = const()[name = string("obj_151_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_151_cast_fp16 = batch_norm(beta = obj_151_beta_0_to_fp16, epsilon = obj_151_epsilon_0_to_fp16, gamma = obj_151_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_49_cast_fp16)[name = string("obj_151_cast_fp16")];
+            string var_2417_pad_type_0 = const()[name = string("op_2417_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2417_strides_0 = const()[name = string("op_2417_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2417_pad_0 = const()[name = string("op_2417_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2417_dilations_0 = const()[name = string("op_2417_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2417_groups_0 = const()[name = string("op_2417_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123339136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123634112))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123634240)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2417_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2417_dilations_0, groups = var_2417_groups_0, pad = var_2417_pad_0, pad_type = var_2417_pad_type_0, strides = var_2417_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2417_cast_fp16")];
+            string var_2423_pad_type_0 = const()[name = string("op_2423_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2423_strides_0 = const()[name = string("op_2423_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2423_pad_0 = const()[name = string("op_2423_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2423_dilations_0 = const()[name = string("op_2423_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2423_groups_0 = const()[name = string("op_2423_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123641792))), nonzero_data = tensor<fp16, [2931]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123635840))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2423_cast_fp16 = conv(dilations = var_2423_dilations_0, groups = var_2423_groups_0, pad = var_2423_pad_0, pad_type = var_2423_pad_type_0, strides = var_2423_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2423_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_33_cast_fp16 = add(x = var_2417_cast_fp16, y = var_2423_cast_fp16)[name = string("query_33_cast_fp16")];
+            string var_2432_pad_type_0 = const()[name = string("op_2432_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2432_strides_0 = const()[name = string("op_2432_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2432_pad_0 = const()[name = string("op_2432_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2432_dilations_0 = const()[name = string("op_2432_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2432_groups_0 = const()[name = string("op_2432_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123715584))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124010560))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_2432_cast_fp16 = conv(dilations = var_2432_dilations_0, groups = var_2432_groups_0, pad = var_2432_pad_0, pad_type = var_2432_pad_type_0, strides = var_2432_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2432_cast_fp16")];
+            string var_2438_pad_type_0 = const()[name = string("op_2438_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2438_strides_0 = const()[name = string("op_2438_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2438_pad_0 = const()[name = string("op_2438_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2438_dilations_0 = const()[name = string("op_2438_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2438_groups_0 = const()[name = string("op_2438_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124017600))), nonzero_data = tensor<fp16, [3403]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124010688))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2438_cast_fp16 = conv(dilations = var_2438_dilations_0, groups = var_2438_groups_0, pad = var_2438_pad_0, pad_type = var_2438_pad_type_0, strides = var_2438_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2438_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_17_cast_fp16 = add(x = var_2432_cast_fp16, y = var_2438_cast_fp16)[name = string("current_key_17_cast_fp16")];
+            string var_2448_pad_type_0 = const()[name = string("op_2448_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2448_strides_0 = const()[name = string("op_2448_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2448_pad_0 = const()[name = string("op_2448_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2448_dilations_0 = const()[name = string("op_2448_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2448_groups_0 = const()[name = string("op_2448_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124091392))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124386368))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124386496)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2448_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2448_dilations_0, groups = var_2448_groups_0, pad = var_2448_pad_0, pad_type = var_2448_pad_type_0, strides = var_2448_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2448_cast_fp16")];
+            string var_2454_pad_type_0 = const()[name = string("op_2454_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2454_strides_0 = const()[name = string("op_2454_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2454_pad_0 = const()[name = string("op_2454_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2454_dilations_0 = const()[name = string("op_2454_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2454_groups_0 = const()[name = string("op_2454_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124394432))), nonzero_data = tensor<fp16, [3111]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124388096))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2454_cast_fp16 = conv(dilations = var_2454_dilations_0, groups = var_2454_groups_0, pad = var_2454_pad_0, pad_type = var_2454_pad_type_0, strides = var_2454_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2454_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_17_cast_fp16 = add(x = var_2448_cast_fp16, y = var_2454_cast_fp16)[name = string("current_value_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2460_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_202_cast_fp16)[name = string("op_2460_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_17_cast_fp16 = add(x = var_71_cast_fp16_8, y = var_2460_cast_fp16)[name = string("key_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2462_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_202_cast_fp16)[name = string("op_2462_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_17_cast_fp16 = add(x = var_86_cast_fp16_8, y = var_2462_cast_fp16)[name = string("value_17_cast_fp16")];
+            tensor<int32, [4]> var_2465 = const()[name = string("op_2465"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_33_cast_fp16 = reshape(shape = var_2465, x = query_33_cast_fp16)[name = string("mh_q_33_cast_fp16")];
+            fp16 var_2467_to_fp16 = const()[name = string("op_2467_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2468_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_2467_to_fp16)[name = string("op_2468_cast_fp16")];
+            tensor<int32, [4]> var_2469 = const()[name = string("op_2469"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2470_cast_fp16 = reshape(shape = var_2469, x = key_17_cast_fp16)[name = string("op_2470_cast_fp16")];
+            bool mh_w_65_transpose_x_0 = const()[name = string("mh_w_65_transpose_x_0"), val = bool(true)];
+            bool mh_w_65_transpose_y_0 = const()[name = string("mh_w_65_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_2468_cast_fp16, y = var_2470_cast_fp16)[name = string("mh_w_65_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_67_cast_fp16 = add(x = mh_w_65_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_67_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_2478_cast_fp16 = softmax(axis = var_2370, x = mh_w_67_cast_fp16)[name = string("op_2478_cast_fp16")];
+            tensor<int32, [4]> var_2479 = const()[name = string("op_2479"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2480_cast_fp16 = reshape(shape = var_2479, x = value_17_cast_fp16)[name = string("op_2480_cast_fp16")];
+            bool attn_33_transpose_x_0 = const()[name = string("attn_33_transpose_x_0"), val = bool(false)];
+            bool attn_33_transpose_y_0 = const()[name = string("attn_33_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_2480_cast_fp16, y = var_2478_cast_fp16)[name = string("attn_33_cast_fp16")];
+            tensor<int32, [4]> var_2483 = const()[name = string("op_2483"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_81_cast_fp16 = reshape(shape = var_2483, x = attn_33_cast_fp16)[name = string("input_81_cast_fp16")];
+            string var_2493_pad_type_0 = const()[name = string("op_2493_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2493_strides_0 = const()[name = string("op_2493_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2493_pad_0 = const()[name = string("op_2493_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2493_dilations_0 = const()[name = string("op_2493_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2493_groups_0 = const()[name = string("op_2493_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124468224))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124763200))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124763328)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2493_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2493_dilations_0, groups = var_2493_groups_0, pad = var_2493_pad_0, pad_type = var_2493_pad_type_0, strides = var_2493_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2493_cast_fp16")];
+            string var_2499_pad_type_0 = const()[name = string("op_2499_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2499_strides_0 = const()[name = string("op_2499_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2499_pad_0 = const()[name = string("op_2499_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2499_dilations_0 = const()[name = string("op_2499_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2499_groups_0 = const()[name = string("op_2499_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124771328))), nonzero_data = tensor<fp16, [3147]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124764928))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2499_cast_fp16 = conv(dilations = var_2499_dilations_0, groups = var_2499_groups_0, pad = var_2499_pad_0, pad_type = var_2499_pad_type_0, strides = var_2499_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2499_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_157_cast_fp16 = add(x = var_2493_cast_fp16, y = var_2499_cast_fp16)[name = string("obj_157_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_157_cast_fp16)[name = string("inputs_51_cast_fp16")];
+            tensor<int32, [1]> out_51_axes_0 = const()[name = string("out_51_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2514_to_fp16 = const()[name = string("op_2514_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2514_to_fp16, x = inputs_51_cast_fp16)[name = string("out_51_cast_fp16")];
+            tensor<fp16, [768]> obj_159_gamma_0_to_fp16 = const()[name = string("obj_159_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124845120)))];
+            tensor<fp16, [768]> obj_159_beta_0_to_fp16 = const()[name = string("obj_159_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124846720)))];
+            fp16 obj_159_epsilon_0_to_fp16 = const()[name = string("obj_159_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_159_cast_fp16 = batch_norm(beta = obj_159_beta_0_to_fp16, epsilon = obj_159_epsilon_0_to_fp16, gamma = obj_159_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_51_cast_fp16)[name = string("obj_159_cast_fp16")];
+            string var_2534_pad_type_0 = const()[name = string("op_2534_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2534_strides_0 = const()[name = string("op_2534_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2534_pad_0 = const()[name = string("op_2534_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2534_dilations_0 = const()[name = string("op_2534_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2534_groups_0 = const()[name = string("op_2534_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124848320))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125143296))))[name = string("layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125143424)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2534_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2534_dilations_0, groups = var_2534_groups_0, pad = var_2534_pad_0, pad_type = var_2534_pad_type_0, strides = var_2534_strides_0, weight = layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_159_cast_fp16)[name = string("op_2534_cast_fp16")];
+            string var_2540_pad_type_0 = const()[name = string("op_2540_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2540_strides_0 = const()[name = string("op_2540_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2540_pad_0 = const()[name = string("op_2540_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2540_dilations_0 = const()[name = string("op_2540_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2540_groups_0 = const()[name = string("op_2540_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125150656))), nonzero_data = tensor<fp16, [2767]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125145024))))[name = string("layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2540_cast_fp16 = conv(dilations = var_2540_dilations_0, groups = var_2540_groups_0, pad = var_2540_pad_0, pad_type = var_2540_pad_type_0, strides = var_2540_strides_0, weight = layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_159_cast_fp16)[name = string("op_2540_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_35_cast_fp16 = add(x = var_2534_cast_fp16, y = var_2540_cast_fp16)[name = string("query_35_cast_fp16")];
+            tensor<int32, [4]> var_2543 = const()[name = string("op_2543"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_35_cast_fp16 = reshape(shape = var_2543, x = query_35_cast_fp16)[name = string("mh_q_35_cast_fp16")];
+            fp16 var_2545_to_fp16 = const()[name = string("op_2545_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2546_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_2545_to_fp16)[name = string("op_2546_cast_fp16")];
+            tensor<int32, [4]> var_2547 = const()[name = string("op_2547"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2548_cast_fp16 = reshape(shape = var_2547, x = obj_161_cast_fp16)[name = string("op_2548_cast_fp16")];
+            bool mh_w_69_transpose_x_0 = const()[name = string("mh_w_69_transpose_x_0"), val = bool(true)];
+            bool mh_w_69_transpose_y_0 = const()[name = string("mh_w_69_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_69_cast_fp16 = matmul(transpose_x = mh_w_69_transpose_x_0, transpose_y = mh_w_69_transpose_y_0, x = var_2546_cast_fp16, y = var_2548_cast_fp16)[name = string("mh_w_69_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_71_cast_fp16 = add(x = mh_w_69_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_71_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_167_cast_fp16 = softmax(axis = var_2370, x = mh_w_71_cast_fp16)[name = string("obj_167_cast_fp16")];
+            tensor<int32, [4]> var_2557 = const()[name = string("op_2557"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2558_cast_fp16 = reshape(shape = var_2557, x = obj_163_cast_fp16)[name = string("op_2558_cast_fp16")];
+            bool attn_35_transpose_x_0 = const()[name = string("attn_35_transpose_x_0"), val = bool(false)];
+            bool attn_35_transpose_y_0 = const()[name = string("attn_35_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2558_cast_fp16, y = obj_167_cast_fp16)[name = string("attn_35_cast_fp16")];
+            tensor<int32, [4]> var_2561 = const()[name = string("op_2561"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_83_cast_fp16 = reshape(shape = var_2561, x = attn_35_cast_fp16)[name = string("input_83_cast_fp16")];
+            string var_2571_pad_type_0 = const()[name = string("op_2571_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2571_strides_0 = const()[name = string("op_2571_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2571_pad_0 = const()[name = string("op_2571_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2571_dilations_0 = const()[name = string("op_2571_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2571_groups_0 = const()[name = string("op_2571_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125224448))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125519424))))[name = string("layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125519552)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2571_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2571_dilations_0, groups = var_2571_groups_0, pad = var_2571_pad_0, pad_type = var_2571_pad_type_0, strides = var_2571_strides_0, weight = layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2571_cast_fp16")];
+            string var_2577_pad_type_0 = const()[name = string("op_2577_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2577_strides_0 = const()[name = string("op_2577_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2577_pad_0 = const()[name = string("op_2577_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2577_dilations_0 = const()[name = string("op_2577_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2577_groups_0 = const()[name = string("op_2577_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125526592))), nonzero_data = tensor<fp16, [2686]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125521152))))[name = string("layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2577_cast_fp16 = conv(dilations = var_2577_dilations_0, groups = var_2577_groups_0, pad = var_2577_pad_0, pad_type = var_2577_pad_type_0, strides = var_2577_strides_0, weight = layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2577_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_165_cast_fp16 = add(x = var_2571_cast_fp16, y = var_2577_cast_fp16)[name = string("obj_165_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_165_cast_fp16)[name = string("inputs_53_cast_fp16")];
+            tensor<int32, [1]> out_53_axes_0 = const()[name = string("out_53_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2591_to_fp16 = const()[name = string("op_2591_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2591_to_fp16, x = inputs_53_cast_fp16)[name = string("out_53_cast_fp16")];
+            tensor<fp16, [768]> input_85_gamma_0_to_fp16 = const()[name = string("input_85_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125600384)))];
+            tensor<fp16, [768]> input_85_beta_0_to_fp16 = const()[name = string("input_85_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125601984)))];
+            fp16 input_85_epsilon_0_to_fp16 = const()[name = string("input_85_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_53_cast_fp16)[name = string("input_85_cast_fp16")];
+            string var_2609_pad_type_0 = const()[name = string("op_2609_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2609_strides_0 = const()[name = string("op_2609_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2609_pad_0 = const()[name = string("op_2609_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2609_dilations_0 = const()[name = string("op_2609_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2609_groups_0 = const()[name = string("op_2609_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125603584))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126783296))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126783424)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_2609_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_2609_dilations_0, groups = var_2609_groups_0, pad = var_2609_pad_0, pad_type = var_2609_pad_type_0, strides = var_2609_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = string("op_2609_cast_fp16")];
+            string var_2615_pad_type_0 = const()[name = string("op_2615_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2615_strides_0 = const()[name = string("op_2615_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2615_pad_0 = const()[name = string("op_2615_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2615_dilations_0 = const()[name = string("op_2615_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2615_groups_0 = const()[name = string("op_2615_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126807552))), nonzero_data = tensor<fp16, [8899]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126789632))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_2615_cast_fp16 = conv(dilations = var_2615_dilations_0, groups = var_2615_groups_0, pad = var_2615_pad_0, pad_type = var_2615_pad_type_0, strides = var_2615_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_85_cast_fp16)[name = string("op_2615_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_87_cast_fp16 = add(x = var_2609_cast_fp16, y = var_2615_cast_fp16)[name = string("input_87_cast_fp16")];
+            string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = string("input_89_cast_fp16")];
+            string var_2626_pad_type_0 = const()[name = string("op_2626_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2626_strides_0 = const()[name = string("op_2626_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2626_pad_0 = const()[name = string("op_2626_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2626_dilations_0 = const()[name = string("op_2626_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2626_groups_0 = const()[name = string("op_2626_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127102528))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128282240))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128282368)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2626_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_2626_dilations_0, groups = var_2626_groups_0, pad = var_2626_pad_0, pad_type = var_2626_pad_type_0, strides = var_2626_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2626_cast_fp16")];
+            string var_2632_pad_type_0 = const()[name = string("op_2632_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2632_strides_0 = const()[name = string("op_2632_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2632_pad_0 = const()[name = string("op_2632_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2632_dilations_0 = const()[name = string("op_2632_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2632_groups_0 = const()[name = string("op_2632_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128308096))), nonzero_data = tensor<fp16, [12025]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128283968))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2632_cast_fp16 = conv(dilations = var_2632_dilations_0, groups = var_2632_groups_0, pad = var_2632_pad_0, pad_type = var_2632_pad_type_0, strides = var_2632_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2632_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_19_cast_fp16 = add(x = var_2626_cast_fp16, y = var_2632_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_55_cast_fp16")];
+            tensor<int32, [4]> obj_179_begin_0 = const()[name = string("obj_179_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> obj_179_end_0 = const()[name = string("obj_179_end_0"), val = tensor<int32, [4]>([10, 768, 1, 1536])];
+            tensor<bool, [4]> obj_179_end_mask_0 = const()[name = string("obj_179_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_179_cast_fp16 = slice_by_index(begin = obj_179_begin_0, end = obj_179_end_0, end_mask = obj_179_end_mask_0, x = read_state_2)[name = string("obj_179_cast_fp16")];
+            tensor<int32, [4]> obj_181_begin_0 = const()[name = string("obj_181_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> obj_181_end_0 = const()[name = string("obj_181_end_0"), val = tensor<int32, [4]>([10, 768, 1, 1536])];
+            tensor<bool, [4]> obj_181_end_mask_0 = const()[name = string("obj_181_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_181_cast_fp16 = slice_by_index(begin = obj_181_begin_0, end = obj_181_end_0, end_mask = obj_181_end_mask_0, x = read_state_3)[name = string("obj_181_cast_fp16")];
+            int32 var_2655 = const()[name = string("op_2655"), val = int32(3)];
+            tensor<int32, [1]> out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2680_to_fp16 = const()[name = string("op_2680_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2680_to_fp16, x = inputs_55_cast_fp16)[name = string("out_55_cast_fp16")];
+            tensor<fp16, [768]> obj_169_gamma_0_to_fp16 = const()[name = string("obj_169_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128603072)))];
+            tensor<fp16, [768]> obj_169_beta_0_to_fp16 = const()[name = string("obj_169_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128604672)))];
+            fp16 obj_169_epsilon_0_to_fp16 = const()[name = string("obj_169_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_169_cast_fp16 = batch_norm(beta = obj_169_beta_0_to_fp16, epsilon = obj_169_epsilon_0_to_fp16, gamma = obj_169_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_55_cast_fp16)[name = string("obj_169_cast_fp16")];
+            string var_2702_pad_type_0 = const()[name = string("op_2702_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2702_strides_0 = const()[name = string("op_2702_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2702_pad_0 = const()[name = string("op_2702_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2702_dilations_0 = const()[name = string("op_2702_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2702_groups_0 = const()[name = string("op_2702_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128606272))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128901248))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128901376)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2702_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2702_dilations_0, groups = var_2702_groups_0, pad = var_2702_pad_0, pad_type = var_2702_pad_type_0, strides = var_2702_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2702_cast_fp16")];
+            string var_2708_pad_type_0 = const()[name = string("op_2708_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2708_strides_0 = const()[name = string("op_2708_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2708_pad_0 = const()[name = string("op_2708_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2708_dilations_0 = const()[name = string("op_2708_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2708_groups_0 = const()[name = string("op_2708_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128908288))), nonzero_data = tensor<fp16, [2599]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128902976))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2708_cast_fp16 = conv(dilations = var_2708_dilations_0, groups = var_2708_groups_0, pad = var_2708_pad_0, pad_type = var_2708_pad_type_0, strides = var_2708_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2708_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_37_cast_fp16 = add(x = var_2702_cast_fp16, y = var_2708_cast_fp16)[name = string("query_37_cast_fp16")];
+            string var_2717_pad_type_0 = const()[name = string("op_2717_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2717_strides_0 = const()[name = string("op_2717_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2717_pad_0 = const()[name = string("op_2717_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2717_dilations_0 = const()[name = string("op_2717_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2717_groups_0 = const()[name = string("op_2717_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128982080))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129277056))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_2717_cast_fp16 = conv(dilations = var_2717_dilations_0, groups = var_2717_groups_0, pad = var_2717_pad_0, pad_type = var_2717_pad_type_0, strides = var_2717_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2717_cast_fp16")];
+            string var_2723_pad_type_0 = const()[name = string("op_2723_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2723_strides_0 = const()[name = string("op_2723_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2723_pad_0 = const()[name = string("op_2723_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2723_dilations_0 = const()[name = string("op_2723_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2723_groups_0 = const()[name = string("op_2723_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129282624))), nonzero_data = tensor<fp16, [2660]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129277184))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2723_cast_fp16 = conv(dilations = var_2723_dilations_0, groups = var_2723_groups_0, pad = var_2723_pad_0, pad_type = var_2723_pad_type_0, strides = var_2723_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2723_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_19_cast_fp16 = add(x = var_2717_cast_fp16, y = var_2723_cast_fp16)[name = string("current_key_19_cast_fp16")];
+            string var_2733_pad_type_0 = const()[name = string("op_2733_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2733_strides_0 = const()[name = string("op_2733_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2733_pad_0 = const()[name = string("op_2733_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2733_dilations_0 = const()[name = string("op_2733_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2733_groups_0 = const()[name = string("op_2733_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129356416))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129651392))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129651520)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2733_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2733_dilations_0, groups = var_2733_groups_0, pad = var_2733_pad_0, pad_type = var_2733_pad_type_0, strides = var_2733_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2733_cast_fp16")];
+            string var_2739_pad_type_0 = const()[name = string("op_2739_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2739_strides_0 = const()[name = string("op_2739_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2739_pad_0 = const()[name = string("op_2739_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2739_dilations_0 = const()[name = string("op_2739_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2739_groups_0 = const()[name = string("op_2739_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129659840))), nonzero_data = tensor<fp16, [3325]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129653120))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2739_cast_fp16 = conv(dilations = var_2739_dilations_0, groups = var_2739_groups_0, pad = var_2739_pad_0, pad_type = var_2739_pad_type_0, strides = var_2739_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2739_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_19_cast_fp16 = add(x = var_2733_cast_fp16, y = var_2739_cast_fp16)[name = string("current_value_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2745_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_202_cast_fp16)[name = string("op_2745_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_19_cast_fp16 = add(x = var_71_cast_fp16_9, y = var_2745_cast_fp16)[name = string("key_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2747_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_202_cast_fp16)[name = string("op_2747_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_19_cast_fp16 = add(x = var_86_cast_fp16_9, y = var_2747_cast_fp16)[name = string("value_19_cast_fp16")];
+            tensor<int32, [4]> var_2750 = const()[name = string("op_2750"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_37_cast_fp16 = reshape(shape = var_2750, x = query_37_cast_fp16)[name = string("mh_q_37_cast_fp16")];
+            fp16 var_2752_to_fp16 = const()[name = string("op_2752_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2753_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_2752_to_fp16)[name = string("op_2753_cast_fp16")];
+            tensor<int32, [4]> var_2754 = const()[name = string("op_2754"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2755_cast_fp16 = reshape(shape = var_2754, x = key_19_cast_fp16)[name = string("op_2755_cast_fp16")];
+            bool mh_w_73_transpose_x_0 = const()[name = string("mh_w_73_transpose_x_0"), val = bool(true)];
+            bool mh_w_73_transpose_y_0 = const()[name = string("mh_w_73_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_73_cast_fp16 = matmul(transpose_x = mh_w_73_transpose_x_0, transpose_y = mh_w_73_transpose_y_0, x = var_2753_cast_fp16, y = var_2755_cast_fp16)[name = string("mh_w_73_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_75_cast_fp16 = add(x = mh_w_73_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_75_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_2763_cast_fp16 = softmax(axis = var_2655, x = mh_w_75_cast_fp16)[name = string("op_2763_cast_fp16")];
+            tensor<int32, [4]> var_2764 = const()[name = string("op_2764"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2765_cast_fp16 = reshape(shape = var_2764, x = value_19_cast_fp16)[name = string("op_2765_cast_fp16")];
+            bool attn_37_transpose_x_0 = const()[name = string("attn_37_transpose_x_0"), val = bool(false)];
+            bool attn_37_transpose_y_0 = const()[name = string("attn_37_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2765_cast_fp16, y = var_2763_cast_fp16)[name = string("attn_37_cast_fp16")];
+            tensor<int32, [4]> var_2768 = const()[name = string("op_2768"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_91_cast_fp16 = reshape(shape = var_2768, x = attn_37_cast_fp16)[name = string("input_91_cast_fp16")];
+            string var_2778_pad_type_0 = const()[name = string("op_2778_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2778_strides_0 = const()[name = string("op_2778_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2778_pad_0 = const()[name = string("op_2778_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2778_dilations_0 = const()[name = string("op_2778_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2778_groups_0 = const()[name = string("op_2778_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129733632))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130028608))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130028736)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2778_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2778_dilations_0, groups = var_2778_groups_0, pad = var_2778_pad_0, pad_type = var_2778_pad_type_0, strides = var_2778_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2778_cast_fp16")];
+            string var_2784_pad_type_0 = const()[name = string("op_2784_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2784_strides_0 = const()[name = string("op_2784_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2784_pad_0 = const()[name = string("op_2784_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2784_dilations_0 = const()[name = string("op_2784_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2784_groups_0 = const()[name = string("op_2784_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130036928))), nonzero_data = tensor<fp16, [3236]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130030336))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2784_cast_fp16 = conv(dilations = var_2784_dilations_0, groups = var_2784_groups_0, pad = var_2784_pad_0, pad_type = var_2784_pad_type_0, strides = var_2784_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2784_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_175_cast_fp16 = add(x = var_2778_cast_fp16, y = var_2784_cast_fp16)[name = string("obj_175_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_175_cast_fp16)[name = string("inputs_57_cast_fp16")];
+            tensor<int32, [1]> out_57_axes_0 = const()[name = string("out_57_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2799_to_fp16 = const()[name = string("op_2799_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_2799_to_fp16, x = inputs_57_cast_fp16)[name = string("out_57_cast_fp16")];
+            tensor<fp16, [768]> obj_177_gamma_0_to_fp16 = const()[name = string("obj_177_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130110720)))];
+            tensor<fp16, [768]> obj_177_beta_0_to_fp16 = const()[name = string("obj_177_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130112320)))];
+            fp16 obj_177_epsilon_0_to_fp16 = const()[name = string("obj_177_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_177_cast_fp16 = batch_norm(beta = obj_177_beta_0_to_fp16, epsilon = obj_177_epsilon_0_to_fp16, gamma = obj_177_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_57_cast_fp16)[name = string("obj_177_cast_fp16")];
+            string var_2819_pad_type_0 = const()[name = string("op_2819_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2819_strides_0 = const()[name = string("op_2819_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2819_pad_0 = const()[name = string("op_2819_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2819_dilations_0 = const()[name = string("op_2819_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2819_groups_0 = const()[name = string("op_2819_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130113920))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130408896))))[name = string("layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130409024)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2819_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2819_dilations_0, groups = var_2819_groups_0, pad = var_2819_pad_0, pad_type = var_2819_pad_type_0, strides = var_2819_strides_0, weight = layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_177_cast_fp16)[name = string("op_2819_cast_fp16")];
+            string var_2825_pad_type_0 = const()[name = string("op_2825_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2825_strides_0 = const()[name = string("op_2825_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2825_pad_0 = const()[name = string("op_2825_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2825_dilations_0 = const()[name = string("op_2825_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2825_groups_0 = const()[name = string("op_2825_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130415744))), nonzero_data = tensor<fp16, [2497]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130410624))))[name = string("layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2825_cast_fp16 = conv(dilations = var_2825_dilations_0, groups = var_2825_groups_0, pad = var_2825_pad_0, pad_type = var_2825_pad_type_0, strides = var_2825_strides_0, weight = layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_177_cast_fp16)[name = string("op_2825_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_39_cast_fp16 = add(x = var_2819_cast_fp16, y = var_2825_cast_fp16)[name = string("query_39_cast_fp16")];
+            tensor<int32, [4]> var_2828 = const()[name = string("op_2828"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_39_cast_fp16 = reshape(shape = var_2828, x = query_39_cast_fp16)[name = string("mh_q_39_cast_fp16")];
+            fp16 var_2830_to_fp16 = const()[name = string("op_2830_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2831_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_2830_to_fp16)[name = string("op_2831_cast_fp16")];
+            tensor<int32, [4]> var_2832 = const()[name = string("op_2832"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2833_cast_fp16 = reshape(shape = var_2832, x = obj_179_cast_fp16)[name = string("op_2833_cast_fp16")];
+            bool mh_w_77_transpose_x_0 = const()[name = string("mh_w_77_transpose_x_0"), val = bool(true)];
+            bool mh_w_77_transpose_y_0 = const()[name = string("mh_w_77_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_77_cast_fp16 = matmul(transpose_x = mh_w_77_transpose_x_0, transpose_y = mh_w_77_transpose_y_0, x = var_2831_cast_fp16, y = var_2833_cast_fp16)[name = string("mh_w_77_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_79_cast_fp16 = add(x = mh_w_77_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_79_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_185_cast_fp16 = softmax(axis = var_2655, x = mh_w_79_cast_fp16)[name = string("obj_185_cast_fp16")];
+            tensor<int32, [4]> var_2842 = const()[name = string("op_2842"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2843_cast_fp16 = reshape(shape = var_2842, x = obj_181_cast_fp16)[name = string("op_2843_cast_fp16")];
+            bool attn_39_transpose_x_0 = const()[name = string("attn_39_transpose_x_0"), val = bool(false)];
+            bool attn_39_transpose_y_0 = const()[name = string("attn_39_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2843_cast_fp16, y = obj_185_cast_fp16)[name = string("attn_39_cast_fp16")];
+            tensor<int32, [4]> var_2846 = const()[name = string("op_2846"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_93_cast_fp16 = reshape(shape = var_2846, x = attn_39_cast_fp16)[name = string("input_93_cast_fp16")];
+            string var_2856_pad_type_0 = const()[name = string("op_2856_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2856_strides_0 = const()[name = string("op_2856_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2856_pad_0 = const()[name = string("op_2856_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2856_dilations_0 = const()[name = string("op_2856_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2856_groups_0 = const()[name = string("op_2856_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130489536))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130784512))))[name = string("layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130784640)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2856_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2856_dilations_0, groups = var_2856_groups_0, pad = var_2856_pad_0, pad_type = var_2856_pad_type_0, strides = var_2856_strides_0, weight = layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("op_2856_cast_fp16")];
+            string var_2862_pad_type_0 = const()[name = string("op_2862_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2862_strides_0 = const()[name = string("op_2862_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2862_pad_0 = const()[name = string("op_2862_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2862_dilations_0 = const()[name = string("op_2862_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2862_groups_0 = const()[name = string("op_2862_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130791680))), nonzero_data = tensor<fp16, [2682]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130786240))))[name = string("layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2862_cast_fp16 = conv(dilations = var_2862_dilations_0, groups = var_2862_groups_0, pad = var_2862_pad_0, pad_type = var_2862_pad_type_0, strides = var_2862_strides_0, weight = layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_93_cast_fp16)[name = string("op_2862_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_183_cast_fp16 = add(x = var_2856_cast_fp16, y = var_2862_cast_fp16)[name = string("obj_183_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_183_cast_fp16)[name = string("inputs_59_cast_fp16")];
+            tensor<int32, [1]> out_59_axes_0 = const()[name = string("out_59_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2876_to_fp16 = const()[name = string("op_2876_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_2876_to_fp16, x = inputs_59_cast_fp16)[name = string("out_59_cast_fp16")];
+            tensor<fp16, [768]> input_95_gamma_0_to_fp16 = const()[name = string("input_95_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130865472)))];
+            tensor<fp16, [768]> input_95_beta_0_to_fp16 = const()[name = string("input_95_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130867072)))];
+            fp16 input_95_epsilon_0_to_fp16 = const()[name = string("input_95_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_59_cast_fp16)[name = string("input_95_cast_fp16")];
+            string var_2894_pad_type_0 = const()[name = string("op_2894_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2894_strides_0 = const()[name = string("op_2894_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2894_pad_0 = const()[name = string("op_2894_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2894_dilations_0 = const()[name = string("op_2894_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2894_groups_0 = const()[name = string("op_2894_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130868672))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132048384))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132048512)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_2894_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_2894_dilations_0, groups = var_2894_groups_0, pad = var_2894_pad_0, pad_type = var_2894_pad_type_0, strides = var_2894_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2894_cast_fp16")];
+            string var_2900_pad_type_0 = const()[name = string("op_2900_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2900_strides_0 = const()[name = string("op_2900_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2900_pad_0 = const()[name = string("op_2900_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2900_dilations_0 = const()[name = string("op_2900_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2900_groups_0 = const()[name = string("op_2900_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132072448))), nonzero_data = tensor<fp16, [8807]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132054720))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_2900_cast_fp16 = conv(dilations = var_2900_dilations_0, groups = var_2900_groups_0, pad = var_2900_pad_0, pad_type = var_2900_pad_type_0, strides = var_2900_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2900_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_97_cast_fp16 = add(x = var_2894_cast_fp16, y = var_2900_cast_fp16)[name = string("input_97_cast_fp16")];
+            string input_99_mode_0 = const()[name = string("input_99_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = string("input_99_cast_fp16")];
+            string var_2911_pad_type_0 = const()[name = string("op_2911_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2911_strides_0 = const()[name = string("op_2911_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2911_pad_0 = const()[name = string("op_2911_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2911_dilations_0 = const()[name = string("op_2911_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2911_groups_0 = const()[name = string("op_2911_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132367424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133547136))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133547264)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2911_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_2911_dilations_0, groups = var_2911_groups_0, pad = var_2911_pad_0, pad_type = var_2911_pad_type_0, strides = var_2911_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("op_2911_cast_fp16")];
+            string var_2917_pad_type_0 = const()[name = string("op_2917_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2917_strides_0 = const()[name = string("op_2917_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2917_pad_0 = const()[name = string("op_2917_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2917_dilations_0 = const()[name = string("op_2917_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2917_groups_0 = const()[name = string("op_2917_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133574720))), nonzero_data = tensor<fp16, [12893]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133548864))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2917_cast_fp16 = conv(dilations = var_2917_dilations_0, groups = var_2917_groups_0, pad = var_2917_pad_0, pad_type = var_2917_pad_type_0, strides = var_2917_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = string("op_2917_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_21_cast_fp16 = add(x = var_2911_cast_fp16, y = var_2917_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_61_cast_fp16")];
+            tensor<int32, [4]> obj_197_begin_0 = const()[name = string("obj_197_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> obj_197_end_0 = const()[name = string("obj_197_end_0"), val = tensor<int32, [4]>([11, 768, 1, 1536])];
+            tensor<bool, [4]> obj_197_end_mask_0 = const()[name = string("obj_197_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_197_cast_fp16 = slice_by_index(begin = obj_197_begin_0, end = obj_197_end_0, end_mask = obj_197_end_mask_0, x = read_state_2)[name = string("obj_197_cast_fp16")];
+            tensor<int32, [4]> obj_199_begin_0 = const()[name = string("obj_199_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> obj_199_end_0 = const()[name = string("obj_199_end_0"), val = tensor<int32, [4]>([11, 768, 1, 1536])];
+            tensor<bool, [4]> obj_199_end_mask_0 = const()[name = string("obj_199_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_199_cast_fp16 = slice_by_index(begin = obj_199_begin_0, end = obj_199_end_0, end_mask = obj_199_end_mask_0, x = read_state_3)[name = string("obj_199_cast_fp16")];
+            int32 var_2940 = const()[name = string("op_2940"), val = int32(3)];
+            tensor<int32, [1]> out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2965_to_fp16 = const()[name = string("op_2965_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_2965_to_fp16, x = inputs_61_cast_fp16)[name = string("out_61_cast_fp16")];
+            tensor<fp16, [768]> obj_187_gamma_0_to_fp16 = const()[name = string("obj_187_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133869696)))];
+            tensor<fp16, [768]> obj_187_beta_0_to_fp16 = const()[name = string("obj_187_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133871296)))];
+            fp16 obj_187_epsilon_0_to_fp16 = const()[name = string("obj_187_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_187_cast_fp16 = batch_norm(beta = obj_187_beta_0_to_fp16, epsilon = obj_187_epsilon_0_to_fp16, gamma = obj_187_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_61_cast_fp16)[name = string("obj_187_cast_fp16")];
+            string var_2987_pad_type_0 = const()[name = string("op_2987_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2987_strides_0 = const()[name = string("op_2987_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2987_pad_0 = const()[name = string("op_2987_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2987_dilations_0 = const()[name = string("op_2987_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2987_groups_0 = const()[name = string("op_2987_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133872896))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134167872))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134168000)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2987_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2987_dilations_0, groups = var_2987_groups_0, pad = var_2987_pad_0, pad_type = var_2987_pad_type_0, strides = var_2987_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_2987_cast_fp16")];
+            string var_2993_pad_type_0 = const()[name = string("op_2993_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2993_strides_0 = const()[name = string("op_2993_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2993_pad_0 = const()[name = string("op_2993_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2993_dilations_0 = const()[name = string("op_2993_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2993_groups_0 = const()[name = string("op_2993_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134175168))), nonzero_data = tensor<fp16, [2738]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134169600))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2993_cast_fp16 = conv(dilations = var_2993_dilations_0, groups = var_2993_groups_0, pad = var_2993_pad_0, pad_type = var_2993_pad_type_0, strides = var_2993_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_2993_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_41_cast_fp16 = add(x = var_2987_cast_fp16, y = var_2993_cast_fp16)[name = string("query_41_cast_fp16")];
+            string var_3002_pad_type_0 = const()[name = string("op_3002_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3002_strides_0 = const()[name = string("op_3002_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3002_pad_0 = const()[name = string("op_3002_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3002_dilations_0 = const()[name = string("op_3002_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3002_groups_0 = const()[name = string("op_3002_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134248960))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134543936))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_3002_cast_fp16 = conv(dilations = var_3002_dilations_0, groups = var_3002_groups_0, pad = var_3002_pad_0, pad_type = var_3002_pad_type_0, strides = var_3002_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_3002_cast_fp16")];
+            string var_3008_pad_type_0 = const()[name = string("op_3008_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3008_strides_0 = const()[name = string("op_3008_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3008_pad_0 = const()[name = string("op_3008_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3008_dilations_0 = const()[name = string("op_3008_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3008_groups_0 = const()[name = string("op_3008_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134549888))), nonzero_data = tensor<fp16, [2857]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134544064))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3008_cast_fp16 = conv(dilations = var_3008_dilations_0, groups = var_3008_groups_0, pad = var_3008_pad_0, pad_type = var_3008_pad_type_0, strides = var_3008_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_3008_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_21_cast_fp16 = add(x = var_3002_cast_fp16, y = var_3008_cast_fp16)[name = string("current_key_21_cast_fp16")];
+            string var_3018_pad_type_0 = const()[name = string("op_3018_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3018_strides_0 = const()[name = string("op_3018_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3018_pad_0 = const()[name = string("op_3018_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3018_dilations_0 = const()[name = string("op_3018_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3018_groups_0 = const()[name = string("op_3018_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134623680))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134918656))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134918784)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3018_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3018_dilations_0, groups = var_3018_groups_0, pad = var_3018_pad_0, pad_type = var_3018_pad_type_0, strides = var_3018_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_3018_cast_fp16")];
+            string var_3024_pad_type_0 = const()[name = string("op_3024_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3024_strides_0 = const()[name = string("op_3024_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3024_pad_0 = const()[name = string("op_3024_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3024_dilations_0 = const()[name = string("op_3024_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3024_groups_0 = const()[name = string("op_3024_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134928448))), nonzero_data = tensor<fp16, [3977]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134920384))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3024_cast_fp16 = conv(dilations = var_3024_dilations_0, groups = var_3024_groups_0, pad = var_3024_pad_0, pad_type = var_3024_pad_type_0, strides = var_3024_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_3024_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_21_cast_fp16 = add(x = var_3018_cast_fp16, y = var_3024_cast_fp16)[name = string("current_value_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_3030_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_202_cast_fp16)[name = string("op_3030_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_21_cast_fp16 = add(x = var_71_cast_fp16_10, y = var_3030_cast_fp16)[name = string("key_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_3032_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_202_cast_fp16)[name = string("op_3032_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_21_cast_fp16 = add(x = var_86_cast_fp16_10, y = var_3032_cast_fp16)[name = string("value_21_cast_fp16")];
+            tensor<int32, [4]> var_3035 = const()[name = string("op_3035"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_41_cast_fp16 = reshape(shape = var_3035, x = query_41_cast_fp16)[name = string("mh_q_41_cast_fp16")];
+            fp16 var_3037_to_fp16 = const()[name = string("op_3037_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_3038_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_3037_to_fp16)[name = string("op_3038_cast_fp16")];
+            tensor<int32, [4]> var_3039 = const()[name = string("op_3039"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_3040_cast_fp16 = reshape(shape = var_3039, x = key_21_cast_fp16)[name = string("op_3040_cast_fp16")];
+            bool mh_w_81_transpose_x_0 = const()[name = string("mh_w_81_transpose_x_0"), val = bool(true)];
+            bool mh_w_81_transpose_y_0 = const()[name = string("mh_w_81_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_81_cast_fp16 = matmul(transpose_x = mh_w_81_transpose_x_0, transpose_y = mh_w_81_transpose_y_0, x = var_3038_cast_fp16, y = var_3040_cast_fp16)[name = string("mh_w_81_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_83_cast_fp16 = add(x = mh_w_81_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_83_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_3048_cast_fp16 = softmax(axis = var_2940, x = mh_w_83_cast_fp16)[name = string("op_3048_cast_fp16")];
+            tensor<int32, [4]> var_3049 = const()[name = string("op_3049"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_3050_cast_fp16 = reshape(shape = var_3049, x = value_21_cast_fp16)[name = string("op_3050_cast_fp16")];
+            bool attn_41_transpose_x_0 = const()[name = string("attn_41_transpose_x_0"), val = bool(false)];
+            bool attn_41_transpose_y_0 = const()[name = string("attn_41_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3050_cast_fp16, y = var_3048_cast_fp16)[name = string("attn_41_cast_fp16")];
+            tensor<int32, [4]> var_3053 = const()[name = string("op_3053"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_101_cast_fp16 = reshape(shape = var_3053, x = attn_41_cast_fp16)[name = string("input_101_cast_fp16")];
+            string var_3063_pad_type_0 = const()[name = string("op_3063_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3063_strides_0 = const()[name = string("op_3063_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3063_pad_0 = const()[name = string("op_3063_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3063_dilations_0 = const()[name = string("op_3063_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3063_groups_0 = const()[name = string("op_3063_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135002240))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135297216))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135297344)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3063_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3063_dilations_0, groups = var_3063_groups_0, pad = var_3063_pad_0, pad_type = var_3063_pad_type_0, strides = var_3063_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("op_3063_cast_fp16")];
+            string var_3069_pad_type_0 = const()[name = string("op_3069_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3069_strides_0 = const()[name = string("op_3069_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3069_pad_0 = const()[name = string("op_3069_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3069_dilations_0 = const()[name = string("op_3069_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3069_groups_0 = const()[name = string("op_3069_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135306944))), nonzero_data = tensor<fp16, [3944]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135298944))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3069_cast_fp16 = conv(dilations = var_3069_dilations_0, groups = var_3069_groups_0, pad = var_3069_pad_0, pad_type = var_3069_pad_type_0, strides = var_3069_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_101_cast_fp16)[name = string("op_3069_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_193_cast_fp16 = add(x = var_3063_cast_fp16, y = var_3069_cast_fp16)[name = string("obj_193_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_193_cast_fp16)[name = string("inputs_63_cast_fp16")];
+            tensor<int32, [1]> out_63_axes_0 = const()[name = string("out_63_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3084_to_fp16 = const()[name = string("op_3084_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_3084_to_fp16, x = inputs_63_cast_fp16)[name = string("out_63_cast_fp16")];
+            tensor<fp16, [768]> obj_195_gamma_0_to_fp16 = const()[name = string("obj_195_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135380736)))];
+            tensor<fp16, [768]> obj_195_beta_0_to_fp16 = const()[name = string("obj_195_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135382336)))];
+            fp16 obj_195_epsilon_0_to_fp16 = const()[name = string("obj_195_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_195_cast_fp16 = batch_norm(beta = obj_195_beta_0_to_fp16, epsilon = obj_195_epsilon_0_to_fp16, gamma = obj_195_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_63_cast_fp16)[name = string("obj_195_cast_fp16")];
+            string var_3104_pad_type_0 = const()[name = string("op_3104_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3104_strides_0 = const()[name = string("op_3104_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3104_pad_0 = const()[name = string("op_3104_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3104_dilations_0 = const()[name = string("op_3104_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3104_groups_0 = const()[name = string("op_3104_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135383936))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135678912))))[name = string("layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135679040)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3104_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3104_dilations_0, groups = var_3104_groups_0, pad = var_3104_pad_0, pad_type = var_3104_pad_type_0, strides = var_3104_strides_0, weight = layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_195_cast_fp16)[name = string("op_3104_cast_fp16")];
+            string var_3110_pad_type_0 = const()[name = string("op_3110_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3110_strides_0 = const()[name = string("op_3110_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3110_pad_0 = const()[name = string("op_3110_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3110_dilations_0 = const()[name = string("op_3110_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3110_groups_0 = const()[name = string("op_3110_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135686464))), nonzero_data = tensor<fp16, [2873]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135680640))))[name = string("layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3110_cast_fp16 = conv(dilations = var_3110_dilations_0, groups = var_3110_groups_0, pad = var_3110_pad_0, pad_type = var_3110_pad_type_0, strides = var_3110_strides_0, weight = layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_195_cast_fp16)[name = string("op_3110_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_43_cast_fp16 = add(x = var_3104_cast_fp16, y = var_3110_cast_fp16)[name = string("query_43_cast_fp16")];
+            tensor<int32, [4]> var_3113 = const()[name = string("op_3113"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_43_cast_fp16 = reshape(shape = var_3113, x = query_43_cast_fp16)[name = string("mh_q_43_cast_fp16")];
+            fp16 var_3115_to_fp16 = const()[name = string("op_3115_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_3116_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_3115_to_fp16)[name = string("op_3116_cast_fp16")];
+            tensor<int32, [4]> var_3117 = const()[name = string("op_3117"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_3118_cast_fp16 = reshape(shape = var_3117, x = obj_197_cast_fp16)[name = string("op_3118_cast_fp16")];
+            bool mh_w_85_transpose_x_0 = const()[name = string("mh_w_85_transpose_x_0"), val = bool(true)];
+            bool mh_w_85_transpose_y_0 = const()[name = string("mh_w_85_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_85_cast_fp16 = matmul(transpose_x = mh_w_85_transpose_x_0, transpose_y = mh_w_85_transpose_y_0, x = var_3116_cast_fp16, y = var_3118_cast_fp16)[name = string("mh_w_85_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_87_cast_fp16 = add(x = mh_w_85_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_87_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_203_cast_fp16 = softmax(axis = var_2940, x = mh_w_87_cast_fp16)[name = string("obj_203_cast_fp16")];
+            tensor<int32, [4]> var_3127 = const()[name = string("op_3127"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_3128_cast_fp16 = reshape(shape = var_3127, x = obj_199_cast_fp16)[name = string("op_3128_cast_fp16")];
+            bool attn_43_transpose_x_0 = const()[name = string("attn_43_transpose_x_0"), val = bool(false)];
+            bool attn_43_transpose_y_0 = const()[name = string("attn_43_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3128_cast_fp16, y = obj_203_cast_fp16)[name = string("attn_43_cast_fp16")];
+            tensor<int32, [4]> var_3131 = const()[name = string("op_3131"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_103_cast_fp16 = reshape(shape = var_3131, x = attn_43_cast_fp16)[name = string("input_103_cast_fp16")];
+            string var_3141_pad_type_0 = const()[name = string("op_3141_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3141_strides_0 = const()[name = string("op_3141_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3141_pad_0 = const()[name = string("op_3141_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3141_dilations_0 = const()[name = string("op_3141_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3141_groups_0 = const()[name = string("op_3141_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135760256))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136055232))))[name = string("layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136055360)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3141_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3141_dilations_0, groups = var_3141_groups_0, pad = var_3141_pad_0, pad_type = var_3141_pad_type_0, strides = var_3141_strides_0, weight = layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("op_3141_cast_fp16")];
+            string var_3147_pad_type_0 = const()[name = string("op_3147_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3147_strides_0 = const()[name = string("op_3147_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3147_pad_0 = const()[name = string("op_3147_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3147_dilations_0 = const()[name = string("op_3147_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3147_groups_0 = const()[name = string("op_3147_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136066368))), nonzero_data = tensor<fp16, [4654]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136056960))))[name = string("layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3147_cast_fp16 = conv(dilations = var_3147_dilations_0, groups = var_3147_groups_0, pad = var_3147_pad_0, pad_type = var_3147_pad_type_0, strides = var_3147_strides_0, weight = layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = string("op_3147_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_201_cast_fp16 = add(x = var_3141_cast_fp16, y = var_3147_cast_fp16)[name = string("obj_201_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_201_cast_fp16)[name = string("inputs_65_cast_fp16")];
+            tensor<int32, [1]> out_65_axes_0 = const()[name = string("out_65_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3161_to_fp16 = const()[name = string("op_3161_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_3161_to_fp16, x = inputs_65_cast_fp16)[name = string("out_65_cast_fp16")];
+            tensor<fp16, [768]> input_105_gamma_0_to_fp16 = const()[name = string("input_105_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136140160)))];
+            tensor<fp16, [768]> input_105_beta_0_to_fp16 = const()[name = string("input_105_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136141760)))];
+            fp16 input_105_epsilon_0_to_fp16 = const()[name = string("input_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_65_cast_fp16)[name = string("input_105_cast_fp16")];
+            string var_3179_pad_type_0 = const()[name = string("op_3179_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3179_strides_0 = const()[name = string("op_3179_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3179_pad_0 = const()[name = string("op_3179_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3179_dilations_0 = const()[name = string("op_3179_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3179_groups_0 = const()[name = string("op_3179_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136143360))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137323072))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137323200)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_3179_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_3179_dilations_0, groups = var_3179_groups_0, pad = var_3179_pad_0, pad_type = var_3179_pad_type_0, strides = var_3179_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("op_3179_cast_fp16")];
+            string var_3185_pad_type_0 = const()[name = string("op_3185_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3185_strides_0 = const()[name = string("op_3185_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3185_pad_0 = const()[name = string("op_3185_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3185_dilations_0 = const()[name = string("op_3185_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3185_groups_0 = const()[name = string("op_3185_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137350912))), nonzero_data = tensor<fp16, [10698]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137329408))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_3185_cast_fp16 = conv(dilations = var_3185_dilations_0, groups = var_3185_groups_0, pad = var_3185_pad_0, pad_type = var_3185_pad_type_0, strides = var_3185_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = string("op_3185_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_107_cast_fp16 = add(x = var_3179_cast_fp16, y = var_3185_cast_fp16)[name = string("input_107_cast_fp16")];
+            string input_109_mode_0 = const()[name = string("input_109_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
+            string var_3196_pad_type_0 = const()[name = string("op_3196_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3196_strides_0 = const()[name = string("op_3196_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3196_pad_0 = const()[name = string("op_3196_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3196_dilations_0 = const()[name = string("op_3196_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3196_groups_0 = const()[name = string("op_3196_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137645888))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138825600))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138825728)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3196_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_3196_dilations_0, groups = var_3196_groups_0, pad = var_3196_pad_0, pad_type = var_3196_pad_type_0, strides = var_3196_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("op_3196_cast_fp16")];
+            string var_3202_pad_type_0 = const()[name = string("op_3202_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3202_strides_0 = const()[name = string("op_3202_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3202_pad_0 = const()[name = string("op_3202_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3202_dilations_0 = const()[name = string("op_3202_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3202_groups_0 = const()[name = string("op_3202_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138857344))), nonzero_data = tensor<fp16, [14957]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138827328))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3202_cast_fp16 = conv(dilations = var_3202_dilations_0, groups = var_3202_groups_0, pad = var_3202_pad_0, pad_type = var_3202_pad_type_0, strides = var_3202_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_109_cast_fp16)[name = string("op_3202_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_23_cast_fp16 = add(x = var_3196_cast_fp16, y = var_3202_cast_fp16)[name = string("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_67_cast_fp16")];
+            tensor<int32, [4]> obj_215_begin_0 = const()[name = string("obj_215_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> obj_215_end_0 = const()[name = string("obj_215_end_0"), val = tensor<int32, [4]>([12, 768, 1, 1536])];
+            tensor<bool, [4]> obj_215_end_mask_0 = const()[name = string("obj_215_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_215_cast_fp16 = slice_by_index(begin = obj_215_begin_0, end = obj_215_end_0, end_mask = obj_215_end_mask_0, x = read_state_2)[name = string("obj_215_cast_fp16")];
+            tensor<int32, [4]> obj_217_begin_0 = const()[name = string("obj_217_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> obj_217_end_0 = const()[name = string("obj_217_end_0"), val = tensor<int32, [4]>([12, 768, 1, 1536])];
+            tensor<bool, [4]> obj_217_end_mask_0 = const()[name = string("obj_217_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_217_cast_fp16 = slice_by_index(begin = obj_217_begin_0, end = obj_217_end_0, end_mask = obj_217_end_mask_0, x = read_state_3)[name = string("obj_217_cast_fp16")];
+            int32 var_3225 = const()[name = string("op_3225"), val = int32(3)];
+            tensor<int32, [1]> out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3250_to_fp16 = const()[name = string("op_3250_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_3250_to_fp16, x = inputs_67_cast_fp16)[name = string("out_67_cast_fp16")];
+            tensor<fp16, [768]> obj_205_gamma_0_to_fp16 = const()[name = string("obj_205_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139152320)))];
+            tensor<fp16, [768]> obj_205_beta_0_to_fp16 = const()[name = string("obj_205_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139153920)))];
+            fp16 obj_205_epsilon_0_to_fp16 = const()[name = string("obj_205_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_205_cast_fp16 = batch_norm(beta = obj_205_beta_0_to_fp16, epsilon = obj_205_epsilon_0_to_fp16, gamma = obj_205_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_67_cast_fp16)[name = string("obj_205_cast_fp16")];
+            string var_3272_pad_type_0 = const()[name = string("op_3272_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3272_strides_0 = const()[name = string("op_3272_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3272_pad_0 = const()[name = string("op_3272_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3272_dilations_0 = const()[name = string("op_3272_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3272_groups_0 = const()[name = string("op_3272_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139155520))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139450496))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139450624)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3272_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3272_dilations_0, groups = var_3272_groups_0, pad = var_3272_pad_0, pad_type = var_3272_pad_type_0, strides = var_3272_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3272_cast_fp16")];
+            string var_3278_pad_type_0 = const()[name = string("op_3278_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3278_strides_0 = const()[name = string("op_3278_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3278_pad_0 = const()[name = string("op_3278_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3278_dilations_0 = const()[name = string("op_3278_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3278_groups_0 = const()[name = string("op_3278_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139457408))), nonzero_data = tensor<fp16, [2530]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139452224))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3278_cast_fp16 = conv(dilations = var_3278_dilations_0, groups = var_3278_groups_0, pad = var_3278_pad_0, pad_type = var_3278_pad_type_0, strides = var_3278_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3278_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_45_cast_fp16 = add(x = var_3272_cast_fp16, y = var_3278_cast_fp16)[name = string("query_45_cast_fp16")];
+            string var_3287_pad_type_0 = const()[name = string("op_3287_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3287_strides_0 = const()[name = string("op_3287_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3287_pad_0 = const()[name = string("op_3287_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3287_dilations_0 = const()[name = string("op_3287_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3287_groups_0 = const()[name = string("op_3287_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139531200))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139826176))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_3287_cast_fp16 = conv(dilations = var_3287_dilations_0, groups = var_3287_groups_0, pad = var_3287_pad_0, pad_type = var_3287_pad_type_0, strides = var_3287_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3287_cast_fp16")];
+            string var_3293_pad_type_0 = const()[name = string("op_3293_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3293_strides_0 = const()[name = string("op_3293_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3293_pad_0 = const()[name = string("op_3293_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3293_dilations_0 = const()[name = string("op_3293_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3293_groups_0 = const()[name = string("op_3293_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139831488))), nonzero_data = tensor<fp16, [2535]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139826304))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3293_cast_fp16 = conv(dilations = var_3293_dilations_0, groups = var_3293_groups_0, pad = var_3293_pad_0, pad_type = var_3293_pad_type_0, strides = var_3293_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3293_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_cast_fp16 = add(x = var_3287_cast_fp16, y = var_3293_cast_fp16)[name = string("current_key_cast_fp16")];
+            string var_3303_pad_type_0 = const()[name = string("op_3303_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3303_strides_0 = const()[name = string("op_3303_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3303_pad_0 = const()[name = string("op_3303_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3303_dilations_0 = const()[name = string("op_3303_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3303_groups_0 = const()[name = string("op_3303_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139905280))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140200256))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140200384)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3303_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3303_dilations_0, groups = var_3303_groups_0, pad = var_3303_pad_0, pad_type = var_3303_pad_type_0, strides = var_3303_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3303_cast_fp16")];
+            string var_3309_pad_type_0 = const()[name = string("op_3309_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3309_strides_0 = const()[name = string("op_3309_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3309_pad_0 = const()[name = string("op_3309_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3309_dilations_0 = const()[name = string("op_3309_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3309_groups_0 = const()[name = string("op_3309_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140207744))), nonzero_data = tensor<fp16, [2823]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140201984))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3309_cast_fp16 = conv(dilations = var_3309_dilations_0, groups = var_3309_groups_0, pad = var_3309_pad_0, pad_type = var_3309_pad_type_0, strides = var_3309_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3309_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_cast_fp16 = add(x = var_3303_cast_fp16, y = var_3309_cast_fp16)[name = string("current_value_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_3315_cast_fp16 = mul(x = current_key_cast_fp16, y = var_202_cast_fp16)[name = string("op_3315_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_cast_fp16 = add(x = var_71_cast_fp16_11, y = var_3315_cast_fp16)[name = string("key_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_3317_cast_fp16 = mul(x = current_value_cast_fp16, y = var_202_cast_fp16)[name = string("op_3317_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_cast_fp16 = add(x = var_86_cast_fp16_11, y = var_3317_cast_fp16)[name = string("value_cast_fp16")];
+            tensor<int32, [4]> var_3320 = const()[name = string("op_3320"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_45_cast_fp16 = reshape(shape = var_3320, x = query_45_cast_fp16)[name = string("mh_q_45_cast_fp16")];
+            fp16 var_3322_to_fp16 = const()[name = string("op_3322_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_3323_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_3322_to_fp16)[name = string("op_3323_cast_fp16")];
+            tensor<int32, [4]> var_3324 = const()[name = string("op_3324"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_3325_cast_fp16 = reshape(shape = var_3324, x = key_cast_fp16)[name = string("op_3325_cast_fp16")];
+            bool mh_w_89_transpose_x_0 = const()[name = string("mh_w_89_transpose_x_0"), val = bool(true)];
+            bool mh_w_89_transpose_y_0 = const()[name = string("mh_w_89_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_89_cast_fp16 = matmul(transpose_x = mh_w_89_transpose_x_0, transpose_y = mh_w_89_transpose_y_0, x = var_3323_cast_fp16, y = var_3325_cast_fp16)[name = string("mh_w_89_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_91_cast_fp16 = add(x = mh_w_89_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_91_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_3333_cast_fp16 = softmax(axis = var_3225, x = mh_w_91_cast_fp16)[name = string("op_3333_cast_fp16")];
+            tensor<int32, [4]> var_3334 = const()[name = string("op_3334"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_3335_cast_fp16 = reshape(shape = var_3334, x = value_cast_fp16)[name = string("op_3335_cast_fp16")];
+            bool attn_45_transpose_x_0 = const()[name = string("attn_45_transpose_x_0"), val = bool(false)];
+            bool attn_45_transpose_y_0 = const()[name = string("attn_45_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_3335_cast_fp16, y = var_3333_cast_fp16)[name = string("attn_45_cast_fp16")];
+            tensor<int32, [4]> var_3338 = const()[name = string("op_3338"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_111_cast_fp16 = reshape(shape = var_3338, x = attn_45_cast_fp16)[name = string("input_111_cast_fp16")];
+            string var_3348_pad_type_0 = const()[name = string("op_3348_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3348_strides_0 = const()[name = string("op_3348_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3348_pad_0 = const()[name = string("op_3348_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3348_dilations_0 = const()[name = string("op_3348_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3348_groups_0 = const()[name = string("op_3348_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140281536))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140576512))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140576640)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3348_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3348_dilations_0, groups = var_3348_groups_0, pad = var_3348_pad_0, pad_type = var_3348_pad_type_0, strides = var_3348_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("op_3348_cast_fp16")];
+            string var_3354_pad_type_0 = const()[name = string("op_3354_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3354_strides_0 = const()[name = string("op_3354_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3354_pad_0 = const()[name = string("op_3354_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3354_dilations_0 = const()[name = string("op_3354_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3354_groups_0 = const()[name = string("op_3354_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140587392))), nonzero_data = tensor<fp16, [4528]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140578240))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3354_cast_fp16 = conv(dilations = var_3354_dilations_0, groups = var_3354_groups_0, pad = var_3354_pad_0, pad_type = var_3354_pad_type_0, strides = var_3354_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = string("op_3354_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_211_cast_fp16 = add(x = var_3348_cast_fp16, y = var_3354_cast_fp16)[name = string("obj_211_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_211_cast_fp16)[name = string("inputs_69_cast_fp16")];
+            tensor<int32, [1]> out_69_axes_0 = const()[name = string("out_69_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3369_to_fp16 = const()[name = string("op_3369_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_3369_to_fp16, x = inputs_69_cast_fp16)[name = string("out_69_cast_fp16")];
+            tensor<fp16, [768]> obj_213_gamma_0_to_fp16 = const()[name = string("obj_213_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140661184)))];
+            tensor<fp16, [768]> obj_213_beta_0_to_fp16 = const()[name = string("obj_213_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140662784)))];
+            fp16 obj_213_epsilon_0_to_fp16 = const()[name = string("obj_213_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_213_cast_fp16 = batch_norm(beta = obj_213_beta_0_to_fp16, epsilon = obj_213_epsilon_0_to_fp16, gamma = obj_213_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_69_cast_fp16)[name = string("obj_213_cast_fp16")];
+            string var_3389_pad_type_0 = const()[name = string("op_3389_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3389_strides_0 = const()[name = string("op_3389_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3389_pad_0 = const()[name = string("op_3389_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3389_dilations_0 = const()[name = string("op_3389_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3389_groups_0 = const()[name = string("op_3389_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140664384))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140959360))))[name = string("layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140959488)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3389_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3389_dilations_0, groups = var_3389_groups_0, pad = var_3389_pad_0, pad_type = var_3389_pad_type_0, strides = var_3389_strides_0, weight = layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_213_cast_fp16)[name = string("op_3389_cast_fp16")];
+            string var_3395_pad_type_0 = const()[name = string("op_3395_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3395_strides_0 = const()[name = string("op_3395_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3395_pad_0 = const()[name = string("op_3395_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3395_dilations_0 = const()[name = string("op_3395_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3395_groups_0 = const()[name = string("op_3395_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140968064))), nonzero_data = tensor<fp16, [3432]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140961088))))[name = string("layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3395_cast_fp16 = conv(dilations = var_3395_dilations_0, groups = var_3395_groups_0, pad = var_3395_pad_0, pad_type = var_3395_pad_type_0, strides = var_3395_strides_0, weight = layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_213_cast_fp16)[name = string("op_3395_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_cast_fp16 = add(x = var_3389_cast_fp16, y = var_3395_cast_fp16)[name = string("query_cast_fp16")];
+            tensor<int32, [4]> var_3398 = const()[name = string("op_3398"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_cast_fp16 = reshape(shape = var_3398, x = query_cast_fp16)[name = string("mh_q_cast_fp16")];
+            fp16 var_3400_to_fp16 = const()[name = string("op_3400_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_3401_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_3400_to_fp16)[name = string("op_3401_cast_fp16")];
+            tensor<int32, [4]> var_3402 = const()[name = string("op_3402"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_3403_cast_fp16 = reshape(shape = var_3402, x = obj_215_cast_fp16)[name = string("op_3403_cast_fp16")];
+            bool mh_w_93_transpose_x_0 = const()[name = string("mh_w_93_transpose_x_0"), val = bool(true)];
+            bool mh_w_93_transpose_y_0 = const()[name = string("mh_w_93_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_93_cast_fp16 = matmul(transpose_x = mh_w_93_transpose_x_0, transpose_y = mh_w_93_transpose_y_0, x = var_3401_cast_fp16, y = var_3403_cast_fp16)[name = string("mh_w_93_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_cast_fp16 = add(x = mh_w_93_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_221_cast_fp16 = softmax(axis = var_3225, x = mh_w_cast_fp16)[name = string("obj_221_cast_fp16")];
+            tensor<int32, [4]> var_3412 = const()[name = string("op_3412"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_3413_cast_fp16 = reshape(shape = var_3412, x = obj_217_cast_fp16)[name = string("op_3413_cast_fp16")];
+            bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)];
+            bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_3413_cast_fp16, y = obj_221_cast_fp16)[name = string("attn_cast_fp16")];
+            tensor<int32, [4]> var_3416 = const()[name = string("op_3416"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_113_cast_fp16 = reshape(shape = var_3416, x = attn_cast_fp16)[name = string("input_113_cast_fp16")];
+            string var_3426_pad_type_0 = const()[name = string("op_3426_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3426_strides_0 = const()[name = string("op_3426_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3426_pad_0 = const()[name = string("op_3426_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3426_dilations_0 = const()[name = string("op_3426_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3426_groups_0 = const()[name = string("op_3426_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141041856))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141336832))))[name = string("layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141336960)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3426_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3426_dilations_0, groups = var_3426_groups_0, pad = var_3426_pad_0, pad_type = var_3426_pad_type_0, strides = var_3426_strides_0, weight = layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("op_3426_cast_fp16")];
+            string var_3432_pad_type_0 = const()[name = string("op_3432_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3432_strides_0 = const()[name = string("op_3432_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3432_pad_0 = const()[name = string("op_3432_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3432_dilations_0 = const()[name = string("op_3432_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3432_groups_0 = const()[name = string("op_3432_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141352000))), nonzero_data = tensor<fp16, [6671]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141338560))))[name = string("layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3432_cast_fp16 = conv(dilations = var_3432_dilations_0, groups = var_3432_groups_0, pad = var_3432_pad_0, pad_type = var_3432_pad_type_0, strides = var_3432_strides_0, weight = layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = string("op_3432_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_219_cast_fp16 = add(x = var_3426_cast_fp16, y = var_3432_cast_fp16)[name = string("obj_219_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_219_cast_fp16)[name = string("inputs_71_cast_fp16")];
+            tensor<int32, [1]> out_71_axes_0 = const()[name = string("out_71_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3446_to_fp16 = const()[name = string("op_3446_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_3446_to_fp16, x = inputs_71_cast_fp16)[name = string("out_71_cast_fp16")];
+            tensor<fp16, [768]> input_115_gamma_0_to_fp16 = const()[name = string("input_115_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141425792)))];
+            tensor<fp16, [768]> input_115_beta_0_to_fp16 = const()[name = string("input_115_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141427392)))];
+            fp16 input_115_epsilon_0_to_fp16 = const()[name = string("input_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_71_cast_fp16)[name = string("input_115_cast_fp16")];
+            string var_3464_pad_type_0 = const()[name = string("op_3464_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3464_strides_0 = const()[name = string("op_3464_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3464_pad_0 = const()[name = string("op_3464_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3464_dilations_0 = const()[name = string("op_3464_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3464_groups_0 = const()[name = string("op_3464_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141428992))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142608704))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142608832)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_3464_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_3464_dilations_0, groups = var_3464_groups_0, pad = var_3464_pad_0, pad_type = var_3464_pad_type_0, strides = var_3464_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("op_3464_cast_fp16")];
+            string var_3470_pad_type_0 = const()[name = string("op_3470_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3470_strides_0 = const()[name = string("op_3470_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3470_pad_0 = const()[name = string("op_3470_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3470_dilations_0 = const()[name = string("op_3470_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3470_groups_0 = const()[name = string("op_3470_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142640768))), nonzero_data = tensor<fp16, [12829]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142615040))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_3470_cast_fp16 = conv(dilations = var_3470_dilations_0, groups = var_3470_groups_0, pad = var_3470_pad_0, pad_type = var_3470_pad_type_0, strides = var_3470_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = string("op_3470_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_117_cast_fp16 = add(x = var_3464_cast_fp16, y = var_3470_cast_fp16)[name = string("input_117_cast_fp16")];
+            string input_mode_0 = const()[name = string("input_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_117_cast_fp16)[name = string("input_cast_fp16")];
+            string var_3481_pad_type_0 = const()[name = string("op_3481_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3481_strides_0 = const()[name = string("op_3481_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3481_pad_0 = const()[name = string("op_3481_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3481_dilations_0 = const()[name = string("op_3481_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3481_groups_0 = const()[name = string("op_3481_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142935744))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144115456))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144115584)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3481_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_3481_dilations_0, groups = var_3481_groups_0, pad = var_3481_pad_0, pad_type = var_3481_pad_type_0, strides = var_3481_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("op_3481_cast_fp16")];
+            string var_3487_pad_type_0 = const()[name = string("op_3487_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3487_strides_0 = const()[name = string("op_3487_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3487_pad_0 = const()[name = string("op_3487_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3487_dilations_0 = const()[name = string("op_3487_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3487_groups_0 = const()[name = string("op_3487_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144151104))), nonzero_data = tensor<fp16, [16925]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144117184))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3487_cast_fp16 = conv(dilations = var_3487_dilations_0, groups = var_3487_groups_0, pad = var_3487_pad_0, pad_type = var_3487_pad_type_0, strides = var_3487_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = string("op_3487_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_25_cast_fp16 = add(x = var_3481_cast_fp16, y = var_3487_cast_fp16)[name = string("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = string("out_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3507_to_fp16 = const()[name = string("op_3507_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_3507_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")];
+            tensor<fp16, [768]> hidden_states_gamma_0_to_fp16 = const()[name = string("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144446080)))];
+            tensor<fp16, [768]> hidden_states_beta_0_to_fp16 = const()[name = string("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144447680)))];
+            fp16 hidden_states_epsilon_0_to_fp16 = const()[name = string("hidden_states_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_3518_axes_0 = const()[name = string("op_3518_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1]> var_3518_cast_fp16 = squeeze(axes = var_3518_axes_0, x = hidden_states_cast_fp16)[name = string("op_3518_cast_fp16")];
+            tensor<int32, [3]> var_3521_perm_0 = const()[name = string("op_3521_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51864]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51864]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144449280)))];
+            tensor<fp16, [1, 1, 768]> var_3521_cast_fp16 = transpose(perm = var_3521_perm_0, x = var_3518_cast_fp16)[name = string("transpose_0")];
+            tensor<fp16, [1, 1, 51864]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_3521_cast_fp16)[name = string("linear_0_cast_fp16")];
+            int32 var_3525 = const()[name = string("op_3525"), val = int32(1)];
+            bool obj_225_interleave_0 = const()[name = string("obj_225_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 9216, 1, 1]> key_cache_updates = concat(axis = var_3525, interleave = obj_225_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_cast_fp16))[name = string("obj_225_cast_fp16")];
+            int32 var_3528 = const()[name = string("op_3528"), val = int32(1)];
+            bool obj_227_interleave_0 = const()[name = string("obj_227_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 9216, 1, 1]> value_cache_updates = concat(axis = var_3528, interleave = obj_227_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_cast_fp16))[name = string("obj_227_cast_fp16")];
+            tensor<int32, [4]> var_3539_begin_0 = const()[name = string("op_3539_begin_0"), val = tensor<int32, [4]>([0, 6, 0, 0])];
+            tensor<int32, [4]> var_3539_end_0 = const()[name = string("op_3539_end_0"), val = tensor<int32, [4]>([1, 7, 1, 1536])];
+            tensor<bool, [4]> var_3539_end_mask_0 = const()[name = string("op_3539_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3539_cast_fp16 = slice_by_index(begin = var_3539_begin_0, end = var_3539_end_0, end_mask = var_3539_end_mask_0, x = obj_131_cast_fp16)[name = string("op_3539_cast_fp16")];
+            tensor<int32, [4]> var_3542_begin_0 = const()[name = string("op_3542_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3542_end_0 = const()[name = string("op_3542_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3542_end_mask_0 = const()[name = string("op_3542_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3542_squeeze_mask_0 = const()[name = string("op_3542_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3542_cast_fp16 = slice_by_index(begin = var_3542_begin_0, end = var_3542_end_0, end_mask = var_3542_end_mask_0, squeeze_mask = var_3542_squeeze_mask_0, x = var_3539_cast_fp16)[name = string("op_3542_cast_fp16")];
+            tensor<int32, [4]> var_3557_begin_0 = const()[name = string("op_3557_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3557_end_0 = const()[name = string("op_3557_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3557_end_mask_0 = const()[name = string("op_3557_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3557_cast_fp16 = slice_by_index(begin = var_3557_begin_0, end = var_3557_end_0, end_mask = var_3557_end_mask_0, x = obj_149_cast_fp16)[name = string("op_3557_cast_fp16")];
+            tensor<int32, [4]> var_3560_begin_0 = const()[name = string("op_3560_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3560_end_0 = const()[name = string("op_3560_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3560_end_mask_0 = const()[name = string("op_3560_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3560_squeeze_mask_0 = const()[name = string("op_3560_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3560_cast_fp16 = slice_by_index(begin = var_3560_begin_0, end = var_3560_end_0, end_mask = var_3560_end_mask_0, squeeze_mask = var_3560_squeeze_mask_0, x = var_3557_cast_fp16)[name = string("op_3560_cast_fp16")];
+            tensor<int32, [4]> var_3575_begin_0 = const()[name = string("op_3575_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_3575_end_0 = const()[name = string("op_3575_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1536])];
+            tensor<bool, [4]> var_3575_end_mask_0 = const()[name = string("op_3575_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3575_cast_fp16 = slice_by_index(begin = var_3575_begin_0, end = var_3575_end_0, end_mask = var_3575_end_mask_0, x = obj_149_cast_fp16)[name = string("op_3575_cast_fp16")];
+            tensor<int32, [4]> var_3578_begin_0 = const()[name = string("op_3578_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3578_end_0 = const()[name = string("op_3578_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3578_end_mask_0 = const()[name = string("op_3578_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3578_squeeze_mask_0 = const()[name = string("op_3578_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3578_cast_fp16 = slice_by_index(begin = var_3578_begin_0, end = var_3578_end_0, end_mask = var_3578_end_mask_0, squeeze_mask = var_3578_squeeze_mask_0, x = var_3575_cast_fp16)[name = string("op_3578_cast_fp16")];
+            tensor<int32, [4]> var_3593_begin_0 = const()[name = string("op_3593_begin_0"), val = tensor<int32, [4]>([0, 8, 0, 0])];
+            tensor<int32, [4]> var_3593_end_0 = const()[name = string("op_3593_end_0"), val = tensor<int32, [4]>([1, 9, 1, 1536])];
+            tensor<bool, [4]> var_3593_end_mask_0 = const()[name = string("op_3593_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3593_cast_fp16 = slice_by_index(begin = var_3593_begin_0, end = var_3593_end_0, end_mask = var_3593_end_mask_0, x = obj_149_cast_fp16)[name = string("op_3593_cast_fp16")];
+            tensor<int32, [4]> var_3596_begin_0 = const()[name = string("op_3596_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3596_end_0 = const()[name = string("op_3596_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3596_end_mask_0 = const()[name = string("op_3596_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3596_squeeze_mask_0 = const()[name = string("op_3596_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3596_cast_fp16 = slice_by_index(begin = var_3596_begin_0, end = var_3596_end_0, end_mask = var_3596_end_mask_0, squeeze_mask = var_3596_squeeze_mask_0, x = var_3593_cast_fp16)[name = string("op_3596_cast_fp16")];
+            tensor<int32, [4]> var_3611_begin_0 = const()[name = string("op_3611_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_3611_end_0 = const()[name = string("op_3611_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1536])];
+            tensor<bool, [4]> var_3611_end_mask_0 = const()[name = string("op_3611_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3611_cast_fp16 = slice_by_index(begin = var_3611_begin_0, end = var_3611_end_0, end_mask = var_3611_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3611_cast_fp16")];
+            tensor<int32, [4]> var_3614_begin_0 = const()[name = string("op_3614_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3614_end_0 = const()[name = string("op_3614_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3614_end_mask_0 = const()[name = string("op_3614_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3614_squeeze_mask_0 = const()[name = string("op_3614_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3614_cast_fp16 = slice_by_index(begin = var_3614_begin_0, end = var_3614_end_0, end_mask = var_3614_end_mask_0, squeeze_mask = var_3614_squeeze_mask_0, x = var_3611_cast_fp16)[name = string("op_3614_cast_fp16")];
+            tensor<int32, [4]> var_3629_begin_0 = const()[name = string("op_3629_begin_0"), val = tensor<int32, [4]>([0, 5, 0, 0])];
+            tensor<int32, [4]> var_3629_end_0 = const()[name = string("op_3629_end_0"), val = tensor<int32, [4]>([1, 6, 1, 1536])];
+            tensor<bool, [4]> var_3629_end_mask_0 = const()[name = string("op_3629_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3629_cast_fp16 = slice_by_index(begin = var_3629_begin_0, end = var_3629_end_0, end_mask = var_3629_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3629_cast_fp16")];
+            tensor<int32, [4]> var_3632_begin_0 = const()[name = string("op_3632_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3632_end_0 = const()[name = string("op_3632_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3632_end_mask_0 = const()[name = string("op_3632_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3632_squeeze_mask_0 = const()[name = string("op_3632_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3632_cast_fp16 = slice_by_index(begin = var_3632_begin_0, end = var_3632_end_0, end_mask = var_3632_end_mask_0, squeeze_mask = var_3632_squeeze_mask_0, x = var_3629_cast_fp16)[name = string("op_3632_cast_fp16")];
+            tensor<int32, [4]> var_3647_begin_0 = const()[name = string("op_3647_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_3647_end_0 = const()[name = string("op_3647_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1536])];
+            tensor<bool, [4]> var_3647_end_mask_0 = const()[name = string("op_3647_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3647_cast_fp16 = slice_by_index(begin = var_3647_begin_0, end = var_3647_end_0, end_mask = var_3647_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3647_cast_fp16")];
+            tensor<int32, [4]> var_3650_begin_0 = const()[name = string("op_3650_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3650_end_0 = const()[name = string("op_3650_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3650_end_mask_0 = const()[name = string("op_3650_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3650_squeeze_mask_0 = const()[name = string("op_3650_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3650_cast_fp16 = slice_by_index(begin = var_3650_begin_0, end = var_3650_end_0, end_mask = var_3650_end_mask_0, squeeze_mask = var_3650_squeeze_mask_0, x = var_3647_cast_fp16)[name = string("op_3650_cast_fp16")];
+            tensor<int32, [4]> var_3665_begin_0 = const()[name = string("op_3665_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3665_end_0 = const()[name = string("op_3665_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3665_end_mask_0 = const()[name = string("op_3665_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3665_cast_fp16 = slice_by_index(begin = var_3665_begin_0, end = var_3665_end_0, end_mask = var_3665_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3665_cast_fp16")];
+            tensor<int32, [4]> var_3668_begin_0 = const()[name = string("op_3668_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3668_end_0 = const()[name = string("op_3668_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3668_end_mask_0 = const()[name = string("op_3668_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3668_squeeze_mask_0 = const()[name = string("op_3668_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3668_cast_fp16 = slice_by_index(begin = var_3668_begin_0, end = var_3668_end_0, end_mask = var_3668_end_mask_0, squeeze_mask = var_3668_squeeze_mask_0, x = var_3665_cast_fp16)[name = string("op_3668_cast_fp16")];
+            tensor<int32, [4]> var_3683_begin_0 = const()[name = string("op_3683_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_3683_end_0 = const()[name = string("op_3683_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1536])];
+            tensor<bool, [4]> var_3683_end_mask_0 = const()[name = string("op_3683_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3683_cast_fp16 = slice_by_index(begin = var_3683_begin_0, end = var_3683_end_0, end_mask = var_3683_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3683_cast_fp16")];
+            tensor<int32, [4]> var_3686_begin_0 = const()[name = string("op_3686_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3686_end_0 = const()[name = string("op_3686_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3686_end_mask_0 = const()[name = string("op_3686_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3686_squeeze_mask_0 = const()[name = string("op_3686_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3686_cast_fp16 = slice_by_index(begin = var_3686_begin_0, end = var_3686_end_0, end_mask = var_3686_end_mask_0, squeeze_mask = var_3686_squeeze_mask_0, x = var_3683_cast_fp16)[name = string("op_3686_cast_fp16")];
+            tensor<int32, [4]> var_3701_begin_0 = const()[name = string("op_3701_begin_0"), val = tensor<int32, [4]>([0, 8, 0, 0])];
+            tensor<int32, [4]> var_3701_end_0 = const()[name = string("op_3701_end_0"), val = tensor<int32, [4]>([1, 9, 1, 1536])];
+            tensor<bool, [4]> var_3701_end_mask_0 = const()[name = string("op_3701_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3701_cast_fp16 = slice_by_index(begin = var_3701_begin_0, end = var_3701_end_0, end_mask = var_3701_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3701_cast_fp16")];
+            tensor<int32, [4]> var_3704_begin_0 = const()[name = string("op_3704_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3704_end_0 = const()[name = string("op_3704_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3704_end_mask_0 = const()[name = string("op_3704_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3704_squeeze_mask_0 = const()[name = string("op_3704_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3704_cast_fp16 = slice_by_index(begin = var_3704_begin_0, end = var_3704_end_0, end_mask = var_3704_end_mask_0, squeeze_mask = var_3704_squeeze_mask_0, x = var_3701_cast_fp16)[name = string("op_3704_cast_fp16")];
+            tensor<int32, [4]> var_3719_begin_0 = const()[name = string("op_3719_begin_0"), val = tensor<int32, [4]>([0, 10, 0, 0])];
+            tensor<int32, [4]> var_3719_end_0 = const()[name = string("op_3719_end_0"), val = tensor<int32, [4]>([1, 11, 1, 1536])];
+            tensor<bool, [4]> var_3719_end_mask_0 = const()[name = string("op_3719_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3719_cast_fp16 = slice_by_index(begin = var_3719_begin_0, end = var_3719_end_0, end_mask = var_3719_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3719_cast_fp16")];
+            tensor<int32, [4]> var_3722_begin_0 = const()[name = string("op_3722_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3722_end_0 = const()[name = string("op_3722_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3722_end_mask_0 = const()[name = string("op_3722_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3722_squeeze_mask_0 = const()[name = string("op_3722_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3722_cast_fp16 = slice_by_index(begin = var_3722_begin_0, end = var_3722_end_0, end_mask = var_3722_end_mask_0, squeeze_mask = var_3722_squeeze_mask_0, x = var_3719_cast_fp16)[name = string("op_3722_cast_fp16")];
+            tensor<int32, [4]> var_3737_begin_0 = const()[name = string("op_3737_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3737_end_0 = const()[name = string("op_3737_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3737_end_mask_0 = const()[name = string("op_3737_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3737_cast_fp16 = slice_by_index(begin = var_3737_begin_0, end = var_3737_end_0, end_mask = var_3737_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3737_cast_fp16")];
+            tensor<int32, [4]> var_3740_begin_0 = const()[name = string("op_3740_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3740_end_0 = const()[name = string("op_3740_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3740_end_mask_0 = const()[name = string("op_3740_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3740_squeeze_mask_0 = const()[name = string("op_3740_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3740_cast_fp16 = slice_by_index(begin = var_3740_begin_0, end = var_3740_end_0, end_mask = var_3740_end_mask_0, squeeze_mask = var_3740_squeeze_mask_0, x = var_3737_cast_fp16)[name = string("op_3740_cast_fp16")];
+            tensor<int32, [4]> var_3755_begin_0 = const()[name = string("op_3755_begin_0"), val = tensor<int32, [4]>([0, 1, 0, 0])];
+            tensor<int32, [4]> var_3755_end_0 = const()[name = string("op_3755_end_0"), val = tensor<int32, [4]>([1, 2, 1, 1536])];
+            tensor<bool, [4]> var_3755_end_mask_0 = const()[name = string("op_3755_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3755_cast_fp16 = slice_by_index(begin = var_3755_begin_0, end = var_3755_end_0, end_mask = var_3755_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3755_cast_fp16")];
+            tensor<int32, [4]> var_3758_begin_0 = const()[name = string("op_3758_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3758_end_0 = const()[name = string("op_3758_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3758_end_mask_0 = const()[name = string("op_3758_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3758_squeeze_mask_0 = const()[name = string("op_3758_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3758_cast_fp16 = slice_by_index(begin = var_3758_begin_0, end = var_3758_end_0, end_mask = var_3758_end_mask_0, squeeze_mask = var_3758_squeeze_mask_0, x = var_3755_cast_fp16)[name = string("op_3758_cast_fp16")];
+            tensor<int32, [4]> var_3773_begin_0 = const()[name = string("op_3773_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_3773_end_0 = const()[name = string("op_3773_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1536])];
+            tensor<bool, [4]> var_3773_end_mask_0 = const()[name = string("op_3773_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3773_cast_fp16 = slice_by_index(begin = var_3773_begin_0, end = var_3773_end_0, end_mask = var_3773_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3773_cast_fp16")];
+            tensor<int32, [4]> var_3776_begin_0 = const()[name = string("op_3776_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3776_end_0 = const()[name = string("op_3776_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3776_end_mask_0 = const()[name = string("op_3776_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3776_squeeze_mask_0 = const()[name = string("op_3776_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3776_cast_fp16 = slice_by_index(begin = var_3776_begin_0, end = var_3776_end_0, end_mask = var_3776_end_mask_0, squeeze_mask = var_3776_squeeze_mask_0, x = var_3773_cast_fp16)[name = string("op_3776_cast_fp16")];
+            tensor<int32, [4]> var_3791_begin_0 = const()[name = string("op_3791_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_3791_end_0 = const()[name = string("op_3791_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1536])];
+            tensor<bool, [4]> var_3791_end_mask_0 = const()[name = string("op_3791_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3791_cast_fp16 = slice_by_index(begin = var_3791_begin_0, end = var_3791_end_0, end_mask = var_3791_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3791_cast_fp16")];
+            tensor<int32, [4]> var_3794_begin_0 = const()[name = string("op_3794_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3794_end_0 = const()[name = string("op_3794_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3794_end_mask_0 = const()[name = string("op_3794_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3794_squeeze_mask_0 = const()[name = string("op_3794_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3794_cast_fp16 = slice_by_index(begin = var_3794_begin_0, end = var_3794_end_0, end_mask = var_3794_end_mask_0, squeeze_mask = var_3794_squeeze_mask_0, x = var_3791_cast_fp16)[name = string("op_3794_cast_fp16")];
+            tensor<int32, [4]> var_3809_begin_0 = const()[name = string("op_3809_begin_0"), val = tensor<int32, [4]>([0, 6, 0, 0])];
+            tensor<int32, [4]> var_3809_end_0 = const()[name = string("op_3809_end_0"), val = tensor<int32, [4]>([1, 7, 1, 1536])];
+            tensor<bool, [4]> var_3809_end_mask_0 = const()[name = string("op_3809_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3809_cast_fp16 = slice_by_index(begin = var_3809_begin_0, end = var_3809_end_0, end_mask = var_3809_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3809_cast_fp16")];
+            tensor<int32, [4]> var_3812_begin_0 = const()[name = string("op_3812_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3812_end_0 = const()[name = string("op_3812_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3812_end_mask_0 = const()[name = string("op_3812_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3812_squeeze_mask_0 = const()[name = string("op_3812_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3812_cast_fp16 = slice_by_index(begin = var_3812_begin_0, end = var_3812_end_0, end_mask = var_3812_end_mask_0, squeeze_mask = var_3812_squeeze_mask_0, x = var_3809_cast_fp16)[name = string("op_3812_cast_fp16")];
+            tensor<int32, [4]> var_3827_begin_0 = const()[name = string("op_3827_begin_0"), val = tensor<int32, [4]>([0, 11, 0, 0])];
+            tensor<int32, [4]> var_3827_end_0 = const()[name = string("op_3827_end_0"), val = tensor<int32, [4]>([1, 12, 1, 1536])];
+            tensor<bool, [4]> var_3827_end_mask_0 = const()[name = string("op_3827_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3827_cast_fp16 = slice_by_index(begin = var_3827_begin_0, end = var_3827_end_0, end_mask = var_3827_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3827_cast_fp16")];
+            tensor<int32, [4]> var_3830_begin_0 = const()[name = string("op_3830_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3830_end_0 = const()[name = string("op_3830_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3830_end_mask_0 = const()[name = string("op_3830_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3830_squeeze_mask_0 = const()[name = string("op_3830_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3830_cast_fp16 = slice_by_index(begin = var_3830_begin_0, end = var_3830_end_0, end_mask = var_3830_end_mask_0, squeeze_mask = var_3830_squeeze_mask_0, x = var_3827_cast_fp16)[name = string("op_3830_cast_fp16")];
+            tensor<int32, [4]> var_3845_begin_0 = const()[name = string("op_3845_begin_0"), val = tensor<int32, [4]>([0, 2, 0, 0])];
+            tensor<int32, [4]> var_3845_end_0 = const()[name = string("op_3845_end_0"), val = tensor<int32, [4]>([1, 3, 1, 1536])];
+            tensor<bool, [4]> var_3845_end_mask_0 = const()[name = string("op_3845_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3845_cast_fp16 = slice_by_index(begin = var_3845_begin_0, end = var_3845_end_0, end_mask = var_3845_end_mask_0, x = obj_221_cast_fp16)[name = string("op_3845_cast_fp16")];
+            tensor<int32, [4]> var_3848_begin_0 = const()[name = string("op_3848_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3848_end_0 = const()[name = string("op_3848_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3848_end_mask_0 = const()[name = string("op_3848_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3848_squeeze_mask_0 = const()[name = string("op_3848_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3848_cast_fp16 = slice_by_index(begin = var_3848_begin_0, end = var_3848_end_0, end_mask = var_3848_end_mask_0, squeeze_mask = var_3848_squeeze_mask_0, x = var_3845_cast_fp16)[name = string("op_3848_cast_fp16")];
+            tensor<int32, [4]> var_3863_begin_0 = const()[name = string("op_3863_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_3863_end_0 = const()[name = string("op_3863_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1536])];
+            tensor<bool, [4]> var_3863_end_mask_0 = const()[name = string("op_3863_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3863_cast_fp16 = slice_by_index(begin = var_3863_begin_0, end = var_3863_end_0, end_mask = var_3863_end_mask_0, x = obj_221_cast_fp16)[name = string("op_3863_cast_fp16")];
+            tensor<int32, [4]> var_3866_begin_0 = const()[name = string("op_3866_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3866_end_0 = const()[name = string("op_3866_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3866_end_mask_0 = const()[name = string("op_3866_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3866_squeeze_mask_0 = const()[name = string("op_3866_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3866_cast_fp16 = slice_by_index(begin = var_3866_begin_0, end = var_3866_end_0, end_mask = var_3866_end_mask_0, squeeze_mask = var_3866_squeeze_mask_0, x = var_3863_cast_fp16)[name = string("op_3866_cast_fp16")];
+            int32 var_3873 = const()[name = string("op_3873"), val = int32(1)];
+            bool var_3874_interleave_0 = const()[name = string("op_3874_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 19, 1536]> var_3874_cast_fp16 = concat(axis = var_3873, interleave = var_3874_interleave_0, values = (var_3542_cast_fp16, var_3560_cast_fp16, var_3578_cast_fp16, var_3596_cast_fp16, var_3614_cast_fp16, var_3632_cast_fp16, var_3650_cast_fp16, var_3668_cast_fp16, var_3686_cast_fp16, var_3704_cast_fp16, var_3722_cast_fp16, var_3740_cast_fp16, var_3758_cast_fp16, var_3776_cast_fp16, var_3794_cast_fp16, var_3812_cast_fp16, var_3830_cast_fp16, var_3848_cast_fp16, var_3866_cast_fp16))[name = string("op_3874_cast_fp16")];
+            bool var_3877 = const()[name = string("op_3877"), val = bool(false)];
+            tensor<int32, [1]> obj_axes_0 = const()[name = string("obj_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1536]> alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_3877, x = var_3874_cast_fp16)[name = string("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mlmodel b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..9f17c163b7d435ccbc2269b5b9cc7dd1a6315d2c
--- /dev/null
+++ b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3523fca50d554f8489c77b650a2ad67bbae52dc273cc7ae065f42d9ed203a61a
+size 471306
diff --git a/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b13fbdd99930849fa69f196ca3c98327f957d309
--- /dev/null
+++ b/openai_whisper-small.en_217MB/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d081733de41795cc1b645efddb2e087d0aee20ec0d4827de1afe9d12968fddf2
+size 144553072
diff --git a/openai_whisper-small_216MB/AudioEncoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d439fcdfa47a94d75a27aad156a24cee9dec53fd
--- /dev/null
+++ b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9013b810ca238cedd09069c765cdf0937f84e1ed74c90df4c4d05b602fdcc7ac
+size 243
diff --git a/openai_whisper-small_216MB/AudioEncoder.mlmodelc/coremldata.bin b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4e7a2bb9e7ef3870c50986867ffd5a6788e5ed84
--- /dev/null
+++ b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a078e65c9369ce8a4a687a2bbb0a8befbd4ed459250c0442176824906fa95ee1
+size 433
diff --git a/openai_whisper-small_216MB/AudioEncoder.mlmodelc/metadata.json b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..3498353a11aebfdf53597f666306084d60002ec2
--- /dev/null
+++ b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/metadata.json
@@ -0,0 +1,92 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (4 bits), Sparse)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 768 × 1 × 1500)",
+        "shortDescription" : "",
+        "shape" : "[1, 768, 1, 1500]",
+        "name" : "encoder_output_embeds",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 12 × 768 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 1536]",
+        "name" : "encoder_attn_key_cache",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 12 × 768 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 1536]",
+        "name" : "encoder_attn_value_cache",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.constexprLutToDense" : 98,
+      "Ios18.constexprSparseToDense" : 96,
+      "Ios18.conv" : 196,
+      "Ios18.matmul" : 24,
+      "Ios18.batchNorm" : 25,
+      "Pad" : 2,
+      "Ios18.gelu" : 14,
+      "Ios18.concat" : 2,
+      "Ios18.add" : 123,
+      "Ios18.softmax" : 12,
+      "Ios18.layerNorm" : 25,
+      "Ios18.reshape" : 48,
+      "Ios18.mul" : 12
+    },
+    "computePrecision" : "Mixed (Float16, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "AudioEncoderStateful_mixedBitPalettized_4_bit",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..f3d41bd3de20bc123b15479fa7b13dafcea071c8
--- /dev/null
+++ b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mil
@@ -0,0 +1,2011 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features) {
+            string var_100_pad_type_0 = const()[name = string("op_100_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_100_pad_0 = const()[name = string("op_100_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_100_strides_0 = const()[name = string("op_100_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_100_dilations_0 = const()[name = string("op_100_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_100_groups_0 = const()[name = string("op_100_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 80, 1, 3]> var_69_to_fp16 = const()[name = string("op_69_to_fp16"), val = tensor<fp16, [768, 80, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [768]> var_81_to_fp16 = const()[name = string("op_81_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368768)))];
+            tensor<fp16, [1, 768, 1, 3000]> var_100_cast_fp16 = conv(bias = var_81_to_fp16, dilations = var_100_dilations_0, groups = var_100_groups_0, pad = var_100_pad_0, pad_type = var_100_pad_type_0, strides = var_100_strides_0, weight = var_69_to_fp16, x = melspectrogram_features)[name = string("op_100_cast_fp16")];
+            string var_138_pad_type_0 = const()[name = string("op_138_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_138_pad_0 = const()[name = string("op_138_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_138_strides_0 = const()[name = string("op_138_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_138_dilations_0 = const()[name = string("op_138_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_138_groups_0 = const()[name = string("op_138_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 80, 1, 3]> op_113_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 80, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(370368))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462592))))[name = string("op_113_to_fp16_palettized")];
+            tensor<fp16, [768]> var_119_to_fp16 = const()[name = string("op_119_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462720)))];
+            tensor<fp16, [1, 768, 1, 3000]> var_138_cast_fp16 = conv(bias = var_119_to_fp16, dilations = var_138_dilations_0, groups = var_138_groups_0, pad = var_138_pad_0, pad_type = var_138_pad_type_0, strides = var_138_strides_0, weight = op_113_to_fp16_palettized, x = melspectrogram_features)[name = string("op_138_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 3000]> var_140_cast_fp16 = add(x = var_100_cast_fp16, y = var_138_cast_fp16)[name = string("op_140_cast_fp16")];
+            string hidden_states_1_mode_0 = const()[name = string("hidden_states_1_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 768, 1, 3000]> hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_140_cast_fp16)[name = string("hidden_states_1_cast_fp16")];
+            string var_186_pad_type_0 = const()[name = string("op_186_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_186_pad_0 = const()[name = string("op_186_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_186_strides_0 = const()[name = string("op_186_strides_0"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_186_dilations_0 = const()[name = string("op_186_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_186_groups_0 = const()[name = string("op_186_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 3]> var_155_to_fp16 = const()[name = string("op_155_to_fp16"), val = tensor<fp16, [768, 768, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(464320)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_186_cast_fp16 = conv(bias = var_81_to_fp16, dilations = var_186_dilations_0, groups = var_186_groups_0, pad = var_186_pad_0, pad_type = var_186_pad_type_0, strides = var_186_strides_0, weight = var_155_to_fp16, x = hidden_states_1_cast_fp16)[name = string("op_186_cast_fp16")];
+            string var_224_pad_type_0 = const()[name = string("op_224_pad_type_0"), val = string("custom")];
+            tensor<int32, [4]> var_224_pad_0 = const()[name = string("op_224_pad_0"), val = tensor<int32, [4]>([0, 0, 1, 1])];
+            tensor<int32, [2]> var_224_strides_0 = const()[name = string("op_224_strides_0"), val = tensor<int32, [2]>([2, 2])];
+            tensor<int32, [2]> var_224_dilations_0 = const()[name = string("op_224_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_224_groups_0 = const()[name = string("op_224_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 3]> op_199_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 3]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4003328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4888128))))[name = string("op_199_to_fp16_palettized")];
+            tensor<fp16, [768]> var_205_to_fp16 = const()[name = string("op_205_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4888256)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_224_cast_fp16 = conv(bias = var_205_to_fp16, dilations = var_224_dilations_0, groups = var_224_groups_0, pad = var_224_pad_0, pad_type = var_224_pad_type_0, strides = var_224_strides_0, weight = op_199_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = string("op_224_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_226_cast_fp16 = add(x = var_186_cast_fp16, y = var_224_cast_fp16)[name = string("op_226_cast_fp16")];
+            string hidden_states_3_mode_0 = const()[name = string("hidden_states_3_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_226_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_246_to_fp16 = const()[name = string("op_246_to_fp16"), val = tensor<fp16, [1, 768, 1, 1500]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(4889856)))];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_246_to_fp16)[name = string("inputs_1_cast_fp16")];
+            int32 var_260 = const()[name = string("op_260"), val = int32(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_279_to_fp16 = const()[name = string("op_279_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_279_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")];
+            tensor<fp16, [768]> obj_1_variance_0_to_fp16 = const()[name = string("obj_1_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7193920)))];
+            tensor<fp16, [768]> obj_1_gamma_0_to_fp16 = const()[name = string("obj_1_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7195520)))];
+            tensor<fp16, [768]> obj_1_beta_0_to_fp16 = const()[name = string("obj_1_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7197120)))];
+            fp16 obj_1_epsilon_0_to_fp16 = const()[name = string("obj_1_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_1_cast_fp16")];
+            string var_301_pad_type_0 = const()[name = string("op_301_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_301_strides_0 = const()[name = string("op_301_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_301_pad_0 = const()[name = string("op_301_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_301_dilations_0 = const()[name = string("op_301_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_301_groups_0 = const()[name = string("op_301_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7198720))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7493696))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7493824)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_301_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_301_dilations_0, groups = var_301_groups_0, pad = var_301_pad_0, pad_type = var_301_pad_type_0, strides = var_301_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_301_cast_fp16")];
+            string var_307_pad_type_0 = const()[name = string("op_307_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_307_strides_0 = const()[name = string("op_307_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_307_pad_0 = const()[name = string("op_307_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_307_dilations_0 = const()[name = string("op_307_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_307_groups_0 = const()[name = string("op_307_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7520576))), nonzero_data = tensor<fp16, [12535]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7495424))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_307_cast_fp16 = conv(dilations = var_307_dilations_0, groups = var_307_groups_0, pad = var_307_pad_0, pad_type = var_307_pad_type_0, strides = var_307_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_307_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_1_cast_fp16 = add(x = var_301_cast_fp16, y = var_307_cast_fp16)[name = string("query_1_cast_fp16")];
+            string var_316_pad_type_0 = const()[name = string("op_316_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_316_strides_0 = const()[name = string("op_316_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_316_pad_0 = const()[name = string("op_316_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_316_dilations_0 = const()[name = string("op_316_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_316_groups_0 = const()[name = string("op_316_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7594368))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7889344))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_316_cast_fp16 = conv(dilations = var_316_dilations_0, groups = var_316_groups_0, pad = var_316_pad_0, pad_type = var_316_pad_type_0, strides = var_316_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_316_cast_fp16")];
+            string var_322_pad_type_0 = const()[name = string("op_322_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_322_strides_0 = const()[name = string("op_322_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_322_pad_0 = const()[name = string("op_322_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_322_dilations_0 = const()[name = string("op_322_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_322_groups_0 = const()[name = string("op_322_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7913600))), nonzero_data = tensor<fp16, [12023]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7889472))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_322_cast_fp16 = conv(dilations = var_322_dilations_0, groups = var_322_groups_0, pad = var_322_pad_0, pad_type = var_322_pad_type_0, strides = var_322_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_322_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_1_cast_fp16 = add(x = var_316_cast_fp16, y = var_322_cast_fp16)[name = string("key_1_cast_fp16")];
+            string var_332_pad_type_0 = const()[name = string("op_332_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_332_strides_0 = const()[name = string("op_332_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_332_pad_0 = const()[name = string("op_332_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_332_dilations_0 = const()[name = string("op_332_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_332_groups_0 = const()[name = string("op_332_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(7987392))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8282368))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8282496)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_332_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_332_dilations_0, groups = var_332_groups_0, pad = var_332_pad_0, pad_type = var_332_pad_type_0, strides = var_332_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = string("op_332_cast_fp16")];
+            string var_338_pad_type_0 = const()[name = string("op_338_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_338_strides_0 = const()[name = string("op_338_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_338_pad_0 = const()[name = string("op_338_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_338_dilations_0 = const()[name = string("op_338_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_338_groups_0 = const()[name = string("op_338_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8308864))), nonzero_data = tensor<fp16, [12322]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8284096))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_338_cast_fp16 = conv(dilations = var_338_dilations_0, groups = var_338_groups_0, pad = var_338_pad_0, pad_type = var_338_pad_type_0, strides = var_338_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = string("op_338_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_1_cast_fp16 = add(x = var_332_cast_fp16, y = var_338_cast_fp16)[name = string("value_1_cast_fp16")];
+            tensor<int32, [4]> var_341 = const()[name = string("op_341"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_1_cast_fp16 = reshape(shape = var_341, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")];
+            fp16 var_343_to_fp16 = const()[name = string("op_343_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_344_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_343_to_fp16)[name = string("op_344_cast_fp16")];
+            tensor<int32, [4]> var_345 = const()[name = string("op_345"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_346_cast_fp16 = reshape(shape = var_345, x = key_1_cast_fp16)[name = string("op_346_cast_fp16")];
+            bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)];
+            bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_344_cast_fp16, y = var_346_cast_fp16)[name = string("mh_w_1_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_349_cast_fp16 = softmax(axis = var_260, x = mh_w_1_cast_fp16)[name = string("op_349_cast_fp16")];
+            tensor<int32, [4]> var_350 = const()[name = string("op_350"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_351_cast_fp16 = reshape(shape = var_350, x = value_1_cast_fp16)[name = string("op_351_cast_fp16")];
+            bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)];
+            bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_351_cast_fp16, y = var_349_cast_fp16)[name = string("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_354 = const()[name = string("op_354"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_1_cast_fp16 = reshape(shape = var_354, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_364_pad_type_0 = const()[name = string("op_364_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_364_strides_0 = const()[name = string("op_364_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_364_pad_0 = const()[name = string("op_364_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_364_dilations_0 = const()[name = string("op_364_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_364_groups_0 = const()[name = string("op_364_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8382656))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8677632))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8677760)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_364_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_364_dilations_0, groups = var_364_groups_0, pad = var_364_pad_0, pad_type = var_364_pad_type_0, strides = var_364_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_364_cast_fp16")];
+            string var_370_pad_type_0 = const()[name = string("op_370_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_370_strides_0 = const()[name = string("op_370_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_370_pad_0 = const()[name = string("op_370_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_370_dilations_0 = const()[name = string("op_370_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_370_groups_0 = const()[name = string("op_370_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8699776))), nonzero_data = tensor<fp16, [10175]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8679360))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_370_cast_fp16 = conv(dilations = var_370_dilations_0, groups = var_370_groups_0, pad = var_370_pad_0, pad_type = var_370_pad_type_0, strides = var_370_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_370_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_3_cast_fp16 = add(x = var_364_cast_fp16, y = var_370_cast_fp16)[name = string("obj_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = string("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_381_to_fp16 = const()[name = string("op_381_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_381_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")];
+            tensor<fp16, [768]> input_3_gamma_0_to_fp16 = const()[name = string("input_3_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8773568)))];
+            tensor<fp16, [768]> input_3_beta_0_to_fp16 = const()[name = string("input_3_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8775168)))];
+            fp16 input_3_epsilon_0_to_fp16 = const()[name = string("input_3_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("input_3_cast_fp16")];
+            string var_399_pad_type_0 = const()[name = string("op_399_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_399_strides_0 = const()[name = string("op_399_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_399_pad_0 = const()[name = string("op_399_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_399_dilations_0 = const()[name = string("op_399_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_399_groups_0 = const()[name = string("op_399_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(8776768))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9956480))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9956608)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_399_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_399_dilations_0, groups = var_399_groups_0, pad = var_399_pad_0, pad_type = var_399_pad_type_0, strides = var_399_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_399_cast_fp16")];
+            string var_405_pad_type_0 = const()[name = string("op_405_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_405_strides_0 = const()[name = string("op_405_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_405_pad_0 = const()[name = string("op_405_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_405_dilations_0 = const()[name = string("op_405_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_405_groups_0 = const()[name = string("op_405_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10057408))), nonzero_data = tensor<fp16, [47244]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(9962816))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_405_cast_fp16 = conv(dilations = var_405_dilations_0, groups = var_405_groups_0, pad = var_405_pad_0, pad_type = var_405_pad_type_0, strides = var_405_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_405_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_5_cast_fp16 = add(x = var_399_cast_fp16, y = var_405_cast_fp16)[name = string("input_5_cast_fp16")];
+            string input_7_mode_0 = const()[name = string("input_7_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = string("input_7_cast_fp16")];
+            string var_416_pad_type_0 = const()[name = string("op_416_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_416_strides_0 = const()[name = string("op_416_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_416_pad_0 = const()[name = string("op_416_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_416_dilations_0 = const()[name = string("op_416_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_416_groups_0 = const()[name = string("op_416_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(10352384))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11532096))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11532224)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_416_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_416_dilations_0, groups = var_416_groups_0, pad = var_416_pad_0, pad_type = var_416_pad_type_0, strides = var_416_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = string("op_416_cast_fp16")];
+            string var_422_pad_type_0 = const()[name = string("op_422_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_422_strides_0 = const()[name = string("op_422_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_422_pad_0 = const()[name = string("op_422_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_422_dilations_0 = const()[name = string("op_422_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_422_groups_0 = const()[name = string("op_422_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11613568))), nonzero_data = tensor<fp16, [39820]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11533824))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_422_cast_fp16 = conv(dilations = var_422_dilations_0, groups = var_422_groups_0, pad = var_422_pad_0, pad_type = var_422_pad_type_0, strides = var_422_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = string("op_422_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_5_cast_fp16 = add(x = var_416_cast_fp16, y = var_422_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_5_cast_fp16")];
+            int32 var_432 = const()[name = string("op_432"), val = int32(3)];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_451_to_fp16 = const()[name = string("op_451_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_451_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")];
+            tensor<fp16, [768]> obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11908544)))];
+            tensor<fp16, [768]> obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11910144)))];
+            fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("obj_5_cast_fp16")];
+            string var_473_pad_type_0 = const()[name = string("op_473_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_473_strides_0 = const()[name = string("op_473_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_473_pad_0 = const()[name = string("op_473_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_473_dilations_0 = const()[name = string("op_473_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_473_groups_0 = const()[name = string("op_473_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(11911744))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12206720))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12206848)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_473_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_473_cast_fp16")];
+            string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12226688))), nonzero_data = tensor<fp16, [9080]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12208448))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_479_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = string("query_3_cast_fp16")];
+            string var_488_pad_type_0 = const()[name = string("op_488_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_488_strides_0 = const()[name = string("op_488_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_488_pad_0 = const()[name = string("op_488_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_488_dilations_0 = const()[name = string("op_488_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_488_groups_0 = const()[name = string("op_488_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12300480))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12595456))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_488_cast_fp16 = conv(dilations = var_488_dilations_0, groups = var_488_groups_0, pad = var_488_pad_0, pad_type = var_488_pad_type_0, strides = var_488_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_488_cast_fp16")];
+            string var_494_pad_type_0 = const()[name = string("op_494_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_494_strides_0 = const()[name = string("op_494_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_494_pad_0 = const()[name = string("op_494_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_494_dilations_0 = const()[name = string("op_494_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_494_groups_0 = const()[name = string("op_494_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12614464))), nonzero_data = tensor<fp16, [9397]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12595584))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_494_cast_fp16 = conv(dilations = var_494_dilations_0, groups = var_494_groups_0, pad = var_494_pad_0, pad_type = var_494_pad_type_0, strides = var_494_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_494_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_3_cast_fp16 = add(x = var_488_cast_fp16, y = var_494_cast_fp16)[name = string("key_3_cast_fp16")];
+            string var_504_pad_type_0 = const()[name = string("op_504_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_504_strides_0 = const()[name = string("op_504_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_504_pad_0 = const()[name = string("op_504_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_504_dilations_0 = const()[name = string("op_504_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_504_groups_0 = const()[name = string("op_504_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12688256))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12983232))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12983360)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_504_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_504_dilations_0, groups = var_504_groups_0, pad = var_504_pad_0, pad_type = var_504_pad_type_0, strides = var_504_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_504_cast_fp16")];
+            string var_510_pad_type_0 = const()[name = string("op_510_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_510_strides_0 = const()[name = string("op_510_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_510_pad_0 = const()[name = string("op_510_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_510_dilations_0 = const()[name = string("op_510_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_510_groups_0 = const()[name = string("op_510_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13000768))), nonzero_data = tensor<fp16, [7866]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(12984960))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_510_cast_fp16 = conv(dilations = var_510_dilations_0, groups = var_510_groups_0, pad = var_510_pad_0, pad_type = var_510_pad_type_0, strides = var_510_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_510_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_3_cast_fp16 = add(x = var_504_cast_fp16, y = var_510_cast_fp16)[name = string("value_3_cast_fp16")];
+            tensor<int32, [4]> var_513 = const()[name = string("op_513"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_3_cast_fp16 = reshape(shape = var_513, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")];
+            fp16 var_515_to_fp16 = const()[name = string("op_515_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_516_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_515_to_fp16)[name = string("op_516_cast_fp16")];
+            tensor<int32, [4]> var_517 = const()[name = string("op_517"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_518_cast_fp16 = reshape(shape = var_517, x = key_3_cast_fp16)[name = string("op_518_cast_fp16")];
+            bool mh_w_3_transpose_x_0 = const()[name = string("mh_w_3_transpose_x_0"), val = bool(true)];
+            bool mh_w_3_transpose_y_0 = const()[name = string("mh_w_3_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_3_cast_fp16 = matmul(transpose_x = mh_w_3_transpose_x_0, transpose_y = mh_w_3_transpose_y_0, x = var_516_cast_fp16, y = var_518_cast_fp16)[name = string("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_521_cast_fp16 = softmax(axis = var_432, x = mh_w_3_cast_fp16)[name = string("op_521_cast_fp16")];
+            tensor<int32, [4]> var_522 = const()[name = string("op_522"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_523_cast_fp16 = reshape(shape = var_522, x = value_3_cast_fp16)[name = string("op_523_cast_fp16")];
+            bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)];
+            bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_523_cast_fp16, y = var_521_cast_fp16)[name = string("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_526 = const()[name = string("op_526"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_9_cast_fp16 = reshape(shape = var_526, x = attn_3_cast_fp16)[name = string("input_9_cast_fp16")];
+            string var_536_pad_type_0 = const()[name = string("op_536_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_536_strides_0 = const()[name = string("op_536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_536_pad_0 = const()[name = string("op_536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_536_dilations_0 = const()[name = string("op_536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_536_groups_0 = const()[name = string("op_536_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13074560))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13369536))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13369664)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_536_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_536_dilations_0, groups = var_536_groups_0, pad = var_536_pad_0, pad_type = var_536_pad_type_0, strides = var_536_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_536_cast_fp16")];
+            string var_542_pad_type_0 = const()[name = string("op_542_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_542_strides_0 = const()[name = string("op_542_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_542_pad_0 = const()[name = string("op_542_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_542_dilations_0 = const()[name = string("op_542_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_542_groups_0 = const()[name = string("op_542_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13384576))), nonzero_data = tensor<fp16, [6617]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13371264))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_542_cast_fp16 = conv(dilations = var_542_dilations_0, groups = var_542_groups_0, pad = var_542_pad_0, pad_type = var_542_pad_type_0, strides = var_542_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_542_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_7_cast_fp16 = add(x = var_536_cast_fp16, y = var_542_cast_fp16)[name = string("obj_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = string("inputs_7_cast_fp16")];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_553_to_fp16 = const()[name = string("op_553_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_553_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")];
+            tensor<fp16, [768]> input_11_gamma_0_to_fp16 = const()[name = string("input_11_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13458368)))];
+            tensor<fp16, [768]> input_11_beta_0_to_fp16 = const()[name = string("input_11_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13459968)))];
+            fp16 input_11_epsilon_0_to_fp16 = const()[name = string("input_11_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("input_11_cast_fp16")];
+            string var_571_pad_type_0 = const()[name = string("op_571_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_571_strides_0 = const()[name = string("op_571_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_571_pad_0 = const()[name = string("op_571_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_571_dilations_0 = const()[name = string("op_571_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_571_groups_0 = const()[name = string("op_571_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(13461568))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14641280))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14641408)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_571_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_571_dilations_0, groups = var_571_groups_0, pad = var_571_pad_0, pad_type = var_571_pad_type_0, strides = var_571_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_571_cast_fp16")];
+            string var_577_pad_type_0 = const()[name = string("op_577_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_577_strides_0 = const()[name = string("op_577_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_577_pad_0 = const()[name = string("op_577_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_577_dilations_0 = const()[name = string("op_577_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_577_groups_0 = const()[name = string("op_577_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14721024))), nonzero_data = tensor<fp16, [36655]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14647616))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_577_cast_fp16 = conv(dilations = var_577_dilations_0, groups = var_577_groups_0, pad = var_577_pad_0, pad_type = var_577_pad_type_0, strides = var_577_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_577_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_13_cast_fp16 = add(x = var_571_cast_fp16, y = var_577_cast_fp16)[name = string("input_13_cast_fp16")];
+            string input_15_mode_0 = const()[name = string("input_15_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = string("input_15_cast_fp16")];
+            string var_588_pad_type_0 = const()[name = string("op_588_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_588_strides_0 = const()[name = string("op_588_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_588_pad_0 = const()[name = string("op_588_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_588_dilations_0 = const()[name = string("op_588_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_588_groups_0 = const()[name = string("op_588_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15016000))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16195712))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16195840)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_588_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_588_dilations_0, groups = var_588_groups_0, pad = var_588_pad_0, pad_type = var_588_pad_type_0, strides = var_588_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_588_cast_fp16")];
+            string var_594_pad_type_0 = const()[name = string("op_594_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_594_strides_0 = const()[name = string("op_594_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_594_pad_0 = const()[name = string("op_594_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_594_dilations_0 = const()[name = string("op_594_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_594_groups_0 = const()[name = string("op_594_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16268160))), nonzero_data = tensor<fp16, [35313]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16197440))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_594_cast_fp16 = conv(dilations = var_594_dilations_0, groups = var_594_groups_0, pad = var_594_pad_0, pad_type = var_594_pad_type_0, strides = var_594_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_594_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_7_cast_fp16 = add(x = var_588_cast_fp16, y = var_594_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_9_cast_fp16")];
+            int32 var_604 = const()[name = string("op_604"), val = int32(3)];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_623_to_fp16 = const()[name = string("op_623_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_623_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")];
+            tensor<fp16, [768]> obj_9_gamma_0_to_fp16 = const()[name = string("obj_9_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16563136)))];
+            tensor<fp16, [768]> obj_9_beta_0_to_fp16 = const()[name = string("obj_9_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16564736)))];
+            fp16 obj_9_epsilon_0_to_fp16 = const()[name = string("obj_9_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_9_cast_fp16")];
+            string var_645_pad_type_0 = const()[name = string("op_645_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_645_strides_0 = const()[name = string("op_645_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_645_pad_0 = const()[name = string("op_645_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_645_dilations_0 = const()[name = string("op_645_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_645_groups_0 = const()[name = string("op_645_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16566336))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16861312))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16861440)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_645_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_645_dilations_0, groups = var_645_groups_0, pad = var_645_pad_0, pad_type = var_645_pad_type_0, strides = var_645_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_645_cast_fp16")];
+            string var_651_pad_type_0 = const()[name = string("op_651_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_651_strides_0 = const()[name = string("op_651_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_651_pad_0 = const()[name = string("op_651_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_651_dilations_0 = const()[name = string("op_651_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_651_groups_0 = const()[name = string("op_651_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16878784))), nonzero_data = tensor<fp16, [7822]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16863040))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_651_cast_fp16 = conv(dilations = var_651_dilations_0, groups = var_651_groups_0, pad = var_651_pad_0, pad_type = var_651_pad_type_0, strides = var_651_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_651_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_5_cast_fp16 = add(x = var_645_cast_fp16, y = var_651_cast_fp16)[name = string("query_5_cast_fp16")];
+            string var_660_pad_type_0 = const()[name = string("op_660_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_660_strides_0 = const()[name = string("op_660_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_660_pad_0 = const()[name = string("op_660_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_660_dilations_0 = const()[name = string("op_660_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_660_groups_0 = const()[name = string("op_660_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16952576))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17247552))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_660_cast_fp16 = conv(dilations = var_660_dilations_0, groups = var_660_groups_0, pad = var_660_pad_0, pad_type = var_660_pad_type_0, strides = var_660_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_660_cast_fp16")];
+            string var_666_pad_type_0 = const()[name = string("op_666_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_666_strides_0 = const()[name = string("op_666_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_666_pad_0 = const()[name = string("op_666_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_666_dilations_0 = const()[name = string("op_666_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_666_groups_0 = const()[name = string("op_666_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17262400))), nonzero_data = tensor<fp16, [7313]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17247680))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_666_cast_fp16 = conv(dilations = var_666_dilations_0, groups = var_666_groups_0, pad = var_666_pad_0, pad_type = var_666_pad_type_0, strides = var_666_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_666_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_5_cast_fp16 = add(x = var_660_cast_fp16, y = var_666_cast_fp16)[name = string("key_5_cast_fp16")];
+            string var_676_pad_type_0 = const()[name = string("op_676_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_676_strides_0 = const()[name = string("op_676_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_676_pad_0 = const()[name = string("op_676_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_676_dilations_0 = const()[name = string("op_676_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_676_groups_0 = const()[name = string("op_676_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17336192))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17631168))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17631296)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_676_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_676_dilations_0, groups = var_676_groups_0, pad = var_676_pad_0, pad_type = var_676_pad_type_0, strides = var_676_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = string("op_676_cast_fp16")];
+            string var_682_pad_type_0 = const()[name = string("op_682_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_682_strides_0 = const()[name = string("op_682_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_682_pad_0 = const()[name = string("op_682_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_682_dilations_0 = const()[name = string("op_682_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_682_groups_0 = const()[name = string("op_682_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17644992))), nonzero_data = tensor<fp16, [5996]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17632896))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_682_cast_fp16 = conv(dilations = var_682_dilations_0, groups = var_682_groups_0, pad = var_682_pad_0, pad_type = var_682_pad_type_0, strides = var_682_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = string("op_682_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_5_cast_fp16 = add(x = var_676_cast_fp16, y = var_682_cast_fp16)[name = string("value_5_cast_fp16")];
+            tensor<int32, [4]> var_685 = const()[name = string("op_685"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_5_cast_fp16 = reshape(shape = var_685, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")];
+            fp16 var_687_to_fp16 = const()[name = string("op_687_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_688_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_687_to_fp16)[name = string("op_688_cast_fp16")];
+            tensor<int32, [4]> var_689 = const()[name = string("op_689"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_690_cast_fp16 = reshape(shape = var_689, x = key_5_cast_fp16)[name = string("op_690_cast_fp16")];
+            bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)];
+            bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_688_cast_fp16, y = var_690_cast_fp16)[name = string("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_693_cast_fp16 = softmax(axis = var_604, x = mh_w_5_cast_fp16)[name = string("op_693_cast_fp16")];
+            tensor<int32, [4]> var_694 = const()[name = string("op_694"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_695_cast_fp16 = reshape(shape = var_694, x = value_5_cast_fp16)[name = string("op_695_cast_fp16")];
+            bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)];
+            bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_695_cast_fp16, y = var_693_cast_fp16)[name = string("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_698 = const()[name = string("op_698"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_17_cast_fp16 = reshape(shape = var_698, x = attn_5_cast_fp16)[name = string("input_17_cast_fp16")];
+            string var_708_pad_type_0 = const()[name = string("op_708_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_708_strides_0 = const()[name = string("op_708_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_708_pad_0 = const()[name = string("op_708_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_708_dilations_0 = const()[name = string("op_708_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_708_groups_0 = const()[name = string("op_708_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17718784))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18013760))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18013888)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_708_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_708_dilations_0, groups = var_708_groups_0, pad = var_708_pad_0, pad_type = var_708_pad_type_0, strides = var_708_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = string("op_708_cast_fp16")];
+            string var_714_pad_type_0 = const()[name = string("op_714_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_714_strides_0 = const()[name = string("op_714_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_714_pad_0 = const()[name = string("op_714_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_714_dilations_0 = const()[name = string("op_714_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_714_groups_0 = const()[name = string("op_714_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18025408))), nonzero_data = tensor<fp16, [4911]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18015488))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_714_cast_fp16 = conv(dilations = var_714_dilations_0, groups = var_714_groups_0, pad = var_714_pad_0, pad_type = var_714_pad_type_0, strides = var_714_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = string("op_714_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_11_cast_fp16 = add(x = var_708_cast_fp16, y = var_714_cast_fp16)[name = string("obj_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_725_to_fp16 = const()[name = string("op_725_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_725_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")];
+            tensor<fp16, [768]> input_19_gamma_0_to_fp16 = const()[name = string("input_19_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18099200)))];
+            tensor<fp16, [768]> input_19_beta_0_to_fp16 = const()[name = string("input_19_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18100800)))];
+            fp16 input_19_epsilon_0_to_fp16 = const()[name = string("input_19_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_19_cast_fp16")];
+            string var_743_pad_type_0 = const()[name = string("op_743_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_743_strides_0 = const()[name = string("op_743_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_743_pad_0 = const()[name = string("op_743_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_743_dilations_0 = const()[name = string("op_743_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_743_groups_0 = const()[name = string("op_743_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18102400))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19282112))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19282240)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_743_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_743_dilations_0, groups = var_743_groups_0, pad = var_743_pad_0, pad_type = var_743_pad_type_0, strides = var_743_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_743_cast_fp16")];
+            string var_749_pad_type_0 = const()[name = string("op_749_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_749_strides_0 = const()[name = string("op_749_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_749_pad_0 = const()[name = string("op_749_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_749_dilations_0 = const()[name = string("op_749_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_749_groups_0 = const()[name = string("op_749_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19352448))), nonzero_data = tensor<fp16, [31950]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19288448))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_749_cast_fp16 = conv(dilations = var_749_dilations_0, groups = var_749_groups_0, pad = var_749_pad_0, pad_type = var_749_pad_type_0, strides = var_749_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_749_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_21_cast_fp16 = add(x = var_743_cast_fp16, y = var_749_cast_fp16)[name = string("input_21_cast_fp16")];
+            string input_23_mode_0 = const()[name = string("input_23_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = string("input_23_cast_fp16")];
+            string var_760_pad_type_0 = const()[name = string("op_760_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_760_strides_0 = const()[name = string("op_760_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_760_pad_0 = const()[name = string("op_760_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_760_dilations_0 = const()[name = string("op_760_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_760_groups_0 = const()[name = string("op_760_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19647424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20827136))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20827264)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_760_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_760_dilations_0, groups = var_760_groups_0, pad = var_760_pad_0, pad_type = var_760_pad_type_0, strides = var_760_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_760_cast_fp16")];
+            string var_766_pad_type_0 = const()[name = string("op_766_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_766_strides_0 = const()[name = string("op_766_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_766_pad_0 = const()[name = string("op_766_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_766_dilations_0 = const()[name = string("op_766_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_766_groups_0 = const()[name = string("op_766_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20892032))), nonzero_data = tensor<fp16, [31546]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20828864))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_766_cast_fp16 = conv(dilations = var_766_dilations_0, groups = var_766_groups_0, pad = var_766_pad_0, pad_type = var_766_pad_type_0, strides = var_766_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_766_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_9_cast_fp16 = add(x = var_760_cast_fp16, y = var_766_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_13_cast_fp16")];
+            int32 var_776 = const()[name = string("op_776"), val = int32(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_795_to_fp16 = const()[name = string("op_795_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_795_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")];
+            tensor<fp16, [768]> obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21187008)))];
+            tensor<fp16, [768]> obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21188608)))];
+            fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_13_cast_fp16")];
+            string var_817_pad_type_0 = const()[name = string("op_817_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_817_strides_0 = const()[name = string("op_817_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_817_pad_0 = const()[name = string("op_817_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_817_dilations_0 = const()[name = string("op_817_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_817_groups_0 = const()[name = string("op_817_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21190208))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21485184))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21485312)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_817_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_817_dilations_0, groups = var_817_groups_0, pad = var_817_pad_0, pad_type = var_817_pad_type_0, strides = var_817_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_817_cast_fp16")];
+            string var_823_pad_type_0 = const()[name = string("op_823_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_823_strides_0 = const()[name = string("op_823_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_823_pad_0 = const()[name = string("op_823_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_823_dilations_0 = const()[name = string("op_823_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_823_groups_0 = const()[name = string("op_823_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21500352))), nonzero_data = tensor<fp16, [6682]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21486912))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_823_cast_fp16 = conv(dilations = var_823_dilations_0, groups = var_823_groups_0, pad = var_823_pad_0, pad_type = var_823_pad_type_0, strides = var_823_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_823_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_7_cast_fp16 = add(x = var_817_cast_fp16, y = var_823_cast_fp16)[name = string("query_7_cast_fp16")];
+            string var_832_pad_type_0 = const()[name = string("op_832_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_832_strides_0 = const()[name = string("op_832_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_832_pad_0 = const()[name = string("op_832_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_832_dilations_0 = const()[name = string("op_832_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_832_groups_0 = const()[name = string("op_832_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21574144))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21869120))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_832_cast_fp16 = conv(dilations = var_832_dilations_0, groups = var_832_groups_0, pad = var_832_pad_0, pad_type = var_832_pad_type_0, strides = var_832_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_832_cast_fp16")];
+            string var_838_pad_type_0 = const()[name = string("op_838_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_838_strides_0 = const()[name = string("op_838_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_838_pad_0 = const()[name = string("op_838_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_838_dilations_0 = const()[name = string("op_838_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_838_groups_0 = const()[name = string("op_838_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21881920))), nonzero_data = tensor<fp16, [6288]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21869248))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_838_cast_fp16 = conv(dilations = var_838_dilations_0, groups = var_838_groups_0, pad = var_838_pad_0, pad_type = var_838_pad_type_0, strides = var_838_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_838_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_7_cast_fp16 = add(x = var_832_cast_fp16, y = var_838_cast_fp16)[name = string("key_7_cast_fp16")];
+            string var_848_pad_type_0 = const()[name = string("op_848_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_848_strides_0 = const()[name = string("op_848_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_848_pad_0 = const()[name = string("op_848_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_848_dilations_0 = const()[name = string("op_848_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_848_groups_0 = const()[name = string("op_848_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21955712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22250688))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22250816)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_848_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_848_dilations_0, groups = var_848_groups_0, pad = var_848_pad_0, pad_type = var_848_pad_type_0, strides = var_848_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_848_cast_fp16")];
+            string var_854_pad_type_0 = const()[name = string("op_854_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_854_strides_0 = const()[name = string("op_854_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_854_pad_0 = const()[name = string("op_854_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_854_dilations_0 = const()[name = string("op_854_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_854_groups_0 = const()[name = string("op_854_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22262080))), nonzero_data = tensor<fp16, [4773]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22252416))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_854_cast_fp16 = conv(dilations = var_854_dilations_0, groups = var_854_groups_0, pad = var_854_pad_0, pad_type = var_854_pad_type_0, strides = var_854_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_854_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_7_cast_fp16 = add(x = var_848_cast_fp16, y = var_854_cast_fp16)[name = string("value_7_cast_fp16")];
+            tensor<int32, [4]> var_857 = const()[name = string("op_857"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_7_cast_fp16 = reshape(shape = var_857, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")];
+            fp16 var_859_to_fp16 = const()[name = string("op_859_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_860_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_859_to_fp16)[name = string("op_860_cast_fp16")];
+            tensor<int32, [4]> var_861 = const()[name = string("op_861"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_862_cast_fp16 = reshape(shape = var_861, x = key_7_cast_fp16)[name = string("op_862_cast_fp16")];
+            bool mh_w_7_transpose_x_0 = const()[name = string("mh_w_7_transpose_x_0"), val = bool(true)];
+            bool mh_w_7_transpose_y_0 = const()[name = string("mh_w_7_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_860_cast_fp16, y = var_862_cast_fp16)[name = string("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_865_cast_fp16 = softmax(axis = var_776, x = mh_w_7_cast_fp16)[name = string("op_865_cast_fp16")];
+            tensor<int32, [4]> var_866 = const()[name = string("op_866"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_867_cast_fp16 = reshape(shape = var_866, x = value_7_cast_fp16)[name = string("op_867_cast_fp16")];
+            bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)];
+            bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_867_cast_fp16, y = var_865_cast_fp16)[name = string("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_870 = const()[name = string("op_870"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_25_cast_fp16 = reshape(shape = var_870, x = attn_7_cast_fp16)[name = string("input_25_cast_fp16")];
+            string var_880_pad_type_0 = const()[name = string("op_880_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_880_strides_0 = const()[name = string("op_880_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_880_pad_0 = const()[name = string("op_880_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_880_dilations_0 = const()[name = string("op_880_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_880_groups_0 = const()[name = string("op_880_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22335872))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22630848))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22630976)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_880_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_880_dilations_0, groups = var_880_groups_0, pad = var_880_pad_0, pad_type = var_880_pad_type_0, strides = var_880_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_880_cast_fp16")];
+            string var_886_pad_type_0 = const()[name = string("op_886_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_886_strides_0 = const()[name = string("op_886_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_886_pad_0 = const()[name = string("op_886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_886_dilations_0 = const()[name = string("op_886_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_886_groups_0 = const()[name = string("op_886_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22640640))), nonzero_data = tensor<fp16, [3976]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22632576))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_886_cast_fp16 = conv(dilations = var_886_dilations_0, groups = var_886_groups_0, pad = var_886_pad_0, pad_type = var_886_pad_type_0, strides = var_886_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_886_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_15_cast_fp16 = add(x = var_880_cast_fp16, y = var_886_cast_fp16)[name = string("obj_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = string("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_897_to_fp16 = const()[name = string("op_897_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_897_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")];
+            tensor<fp16, [768]> input_27_gamma_0_to_fp16 = const()[name = string("input_27_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22714432)))];
+            tensor<fp16, [768]> input_27_beta_0_to_fp16 = const()[name = string("input_27_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22716032)))];
+            fp16 input_27_epsilon_0_to_fp16 = const()[name = string("input_27_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("input_27_cast_fp16")];
+            string var_915_pad_type_0 = const()[name = string("op_915_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_915_strides_0 = const()[name = string("op_915_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_915_pad_0 = const()[name = string("op_915_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_915_dilations_0 = const()[name = string("op_915_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_915_groups_0 = const()[name = string("op_915_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22717632))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23897344))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23897472)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_915_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_915_dilations_0, groups = var_915_groups_0, pad = var_915_pad_0, pad_type = var_915_pad_type_0, strides = var_915_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = string("op_915_cast_fp16")];
+            string var_921_pad_type_0 = const()[name = string("op_921_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_921_strides_0 = const()[name = string("op_921_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_921_pad_0 = const()[name = string("op_921_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_921_dilations_0 = const()[name = string("op_921_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_921_groups_0 = const()[name = string("op_921_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23960640))), nonzero_data = tensor<fp16, [28437]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23903680))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_921_cast_fp16 = conv(dilations = var_921_dilations_0, groups = var_921_groups_0, pad = var_921_pad_0, pad_type = var_921_pad_type_0, strides = var_921_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = string("op_921_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_29_cast_fp16 = add(x = var_915_cast_fp16, y = var_921_cast_fp16)[name = string("input_29_cast_fp16")];
+            string input_31_mode_0 = const()[name = string("input_31_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = string("input_31_cast_fp16")];
+            string var_932_pad_type_0 = const()[name = string("op_932_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_932_strides_0 = const()[name = string("op_932_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_932_pad_0 = const()[name = string("op_932_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_932_dilations_0 = const()[name = string("op_932_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_932_groups_0 = const()[name = string("op_932_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24255616))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25435328))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25435456)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_932_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_932_dilations_0, groups = var_932_groups_0, pad = var_932_pad_0, pad_type = var_932_pad_type_0, strides = var_932_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_932_cast_fp16")];
+            string var_938_pad_type_0 = const()[name = string("op_938_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_938_strides_0 = const()[name = string("op_938_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_938_pad_0 = const()[name = string("op_938_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_938_dilations_0 = const()[name = string("op_938_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_938_groups_0 = const()[name = string("op_938_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25492672))), nonzero_data = tensor<fp16, [27768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25437056))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_938_cast_fp16 = conv(dilations = var_938_dilations_0, groups = var_938_groups_0, pad = var_938_pad_0, pad_type = var_938_pad_type_0, strides = var_938_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_938_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_11_cast_fp16 = add(x = var_932_cast_fp16, y = var_938_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_17_cast_fp16")];
+            int32 var_948 = const()[name = string("op_948"), val = int32(3)];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_967_to_fp16 = const()[name = string("op_967_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_967_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")];
+            tensor<fp16, [768]> obj_17_gamma_0_to_fp16 = const()[name = string("obj_17_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25787648)))];
+            tensor<fp16, [768]> obj_17_beta_0_to_fp16 = const()[name = string("obj_17_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25789248)))];
+            fp16 obj_17_epsilon_0_to_fp16 = const()[name = string("obj_17_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("obj_17_cast_fp16")];
+            string var_989_pad_type_0 = const()[name = string("op_989_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_989_strides_0 = const()[name = string("op_989_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_989_pad_0 = const()[name = string("op_989_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_989_dilations_0 = const()[name = string("op_989_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_989_groups_0 = const()[name = string("op_989_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25790848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26085824))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26085952)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_989_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_989_dilations_0, groups = var_989_groups_0, pad = var_989_pad_0, pad_type = var_989_pad_type_0, strides = var_989_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_989_cast_fp16")];
+            string var_995_pad_type_0 = const()[name = string("op_995_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_995_strides_0 = const()[name = string("op_995_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_995_pad_0 = const()[name = string("op_995_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_995_dilations_0 = const()[name = string("op_995_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_995_groups_0 = const()[name = string("op_995_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26100672))), nonzero_data = tensor<fp16, [6507]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26087552))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_995_cast_fp16 = conv(dilations = var_995_dilations_0, groups = var_995_groups_0, pad = var_995_pad_0, pad_type = var_995_pad_type_0, strides = var_995_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_995_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_9_cast_fp16 = add(x = var_989_cast_fp16, y = var_995_cast_fp16)[name = string("query_9_cast_fp16")];
+            string var_1004_pad_type_0 = const()[name = string("op_1004_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1004_strides_0 = const()[name = string("op_1004_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1004_pad_0 = const()[name = string("op_1004_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1004_dilations_0 = const()[name = string("op_1004_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1004_groups_0 = const()[name = string("op_1004_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26174464))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26469440))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1004_cast_fp16 = conv(dilations = var_1004_dilations_0, groups = var_1004_groups_0, pad = var_1004_pad_0, pad_type = var_1004_pad_type_0, strides = var_1004_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1004_cast_fp16")];
+            string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26482240))), nonzero_data = tensor<fp16, [6290]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26469568))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1010_cast_fp16 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1010_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_9_cast_fp16 = add(x = var_1004_cast_fp16, y = var_1010_cast_fp16)[name = string("key_9_cast_fp16")];
+            string var_1020_pad_type_0 = const()[name = string("op_1020_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1020_strides_0 = const()[name = string("op_1020_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1020_pad_0 = const()[name = string("op_1020_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1020_dilations_0 = const()[name = string("op_1020_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1020_groups_0 = const()[name = string("op_1020_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26556032))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26851008))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26851136)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1020_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1020_dilations_0, groups = var_1020_groups_0, pad = var_1020_pad_0, pad_type = var_1020_pad_type_0, strides = var_1020_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = string("op_1020_cast_fp16")];
+            string var_1026_pad_type_0 = const()[name = string("op_1026_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1026_strides_0 = const()[name = string("op_1026_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1026_pad_0 = const()[name = string("op_1026_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1026_dilations_0 = const()[name = string("op_1026_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1026_groups_0 = const()[name = string("op_1026_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26862144))), nonzero_data = tensor<fp16, [4665]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26852736))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1026_cast_fp16 = conv(dilations = var_1026_dilations_0, groups = var_1026_groups_0, pad = var_1026_pad_0, pad_type = var_1026_pad_type_0, strides = var_1026_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = string("op_1026_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_9_cast_fp16 = add(x = var_1020_cast_fp16, y = var_1026_cast_fp16)[name = string("value_9_cast_fp16")];
+            tensor<int32, [4]> var_1029 = const()[name = string("op_1029"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_9_cast_fp16 = reshape(shape = var_1029, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")];
+            fp16 var_1031_to_fp16 = const()[name = string("op_1031_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1032_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_1031_to_fp16)[name = string("op_1032_cast_fp16")];
+            tensor<int32, [4]> var_1033 = const()[name = string("op_1033"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1034_cast_fp16 = reshape(shape = var_1033, x = key_9_cast_fp16)[name = string("op_1034_cast_fp16")];
+            bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)];
+            bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_1032_cast_fp16, y = var_1034_cast_fp16)[name = string("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1037_cast_fp16 = softmax(axis = var_948, x = mh_w_9_cast_fp16)[name = string("op_1037_cast_fp16")];
+            tensor<int32, [4]> var_1038 = const()[name = string("op_1038"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1039_cast_fp16 = reshape(shape = var_1038, x = value_9_cast_fp16)[name = string("op_1039_cast_fp16")];
+            bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)];
+            bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_1039_cast_fp16, y = var_1037_cast_fp16)[name = string("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_1042 = const()[name = string("op_1042"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_33_cast_fp16 = reshape(shape = var_1042, x = attn_9_cast_fp16)[name = string("input_33_cast_fp16")];
+            string var_1052_pad_type_0 = const()[name = string("op_1052_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1052_strides_0 = const()[name = string("op_1052_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1052_pad_0 = const()[name = string("op_1052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1052_dilations_0 = const()[name = string("op_1052_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1052_groups_0 = const()[name = string("op_1052_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26935936))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27230912))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27231040)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1052_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1052_dilations_0, groups = var_1052_groups_0, pad = var_1052_pad_0, pad_type = var_1052_pad_type_0, strides = var_1052_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1052_cast_fp16")];
+            string var_1058_pad_type_0 = const()[name = string("op_1058_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1058_strides_0 = const()[name = string("op_1058_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1058_pad_0 = const()[name = string("op_1058_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1058_dilations_0 = const()[name = string("op_1058_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1058_groups_0 = const()[name = string("op_1058_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27241344))), nonzero_data = tensor<fp16, [4306]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27232640))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1058_cast_fp16 = conv(dilations = var_1058_dilations_0, groups = var_1058_groups_0, pad = var_1058_pad_0, pad_type = var_1058_pad_type_0, strides = var_1058_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1058_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_19_cast_fp16 = add(x = var_1052_cast_fp16, y = var_1058_cast_fp16)[name = string("obj_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = string("inputs_19_cast_fp16")];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1069_to_fp16 = const()[name = string("op_1069_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1069_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")];
+            tensor<fp16, [768]> input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27315136)))];
+            tensor<fp16, [768]> input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27316736)))];
+            fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("input_35_cast_fp16")];
+            string var_1087_pad_type_0 = const()[name = string("op_1087_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1087_strides_0 = const()[name = string("op_1087_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1087_pad_0 = const()[name = string("op_1087_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1087_dilations_0 = const()[name = string("op_1087_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1087_groups_0 = const()[name = string("op_1087_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27318336))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28498048))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28498176)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1087_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1087_dilations_0, groups = var_1087_groups_0, pad = var_1087_pad_0, pad_type = var_1087_pad_type_0, strides = var_1087_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1087_cast_fp16")];
+            string var_1093_pad_type_0 = const()[name = string("op_1093_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1093_strides_0 = const()[name = string("op_1093_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1093_pad_0 = const()[name = string("op_1093_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1093_dilations_0 = const()[name = string("op_1093_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1093_groups_0 = const()[name = string("op_1093_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28545344))), nonzero_data = tensor<fp16, [20439]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28504384))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1093_cast_fp16 = conv(dilations = var_1093_dilations_0, groups = var_1093_groups_0, pad = var_1093_pad_0, pad_type = var_1093_pad_type_0, strides = var_1093_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1093_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_37_cast_fp16 = add(x = var_1087_cast_fp16, y = var_1093_cast_fp16)[name = string("input_37_cast_fp16")];
+            string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")];
+            string var_1104_pad_type_0 = const()[name = string("op_1104_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1104_strides_0 = const()[name = string("op_1104_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1104_pad_0 = const()[name = string("op_1104_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1104_dilations_0 = const()[name = string("op_1104_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1104_groups_0 = const()[name = string("op_1104_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28840320))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30020032))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30020160)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1104_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1104_dilations_0, groups = var_1104_groups_0, pad = var_1104_pad_0, pad_type = var_1104_pad_type_0, strides = var_1104_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1104_cast_fp16")];
+            string var_1110_pad_type_0 = const()[name = string("op_1110_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1110_strides_0 = const()[name = string("op_1110_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1110_pad_0 = const()[name = string("op_1110_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1110_dilations_0 = const()[name = string("op_1110_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1110_groups_0 = const()[name = string("op_1110_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30065024))), nonzero_data = tensor<fp16, [21573]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30021760))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1110_cast_fp16 = conv(dilations = var_1110_dilations_0, groups = var_1110_groups_0, pad = var_1110_pad_0, pad_type = var_1110_pad_type_0, strides = var_1110_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1110_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_13_cast_fp16 = add(x = var_1104_cast_fp16, y = var_1110_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_21_cast_fp16")];
+            int32 var_1120 = const()[name = string("op_1120"), val = int32(3)];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1139_to_fp16 = const()[name = string("op_1139_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1139_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")];
+            tensor<fp16, [768]> obj_21_gamma_0_to_fp16 = const()[name = string("obj_21_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30360000)))];
+            tensor<fp16, [768]> obj_21_beta_0_to_fp16 = const()[name = string("obj_21_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30361600)))];
+            fp16 obj_21_epsilon_0_to_fp16 = const()[name = string("obj_21_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_21_cast_fp16")];
+            string var_1161_pad_type_0 = const()[name = string("op_1161_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1161_strides_0 = const()[name = string("op_1161_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1161_pad_0 = const()[name = string("op_1161_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1161_dilations_0 = const()[name = string("op_1161_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1161_groups_0 = const()[name = string("op_1161_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30363200))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30658176))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30658304)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1161_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1161_dilations_0, groups = var_1161_groups_0, pad = var_1161_pad_0, pad_type = var_1161_pad_type_0, strides = var_1161_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1161_cast_fp16")];
+            string var_1167_pad_type_0 = const()[name = string("op_1167_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1167_strides_0 = const()[name = string("op_1167_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1167_pad_0 = const()[name = string("op_1167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1167_dilations_0 = const()[name = string("op_1167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1167_groups_0 = const()[name = string("op_1167_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30670336))), nonzero_data = tensor<fp16, [5173]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30659904))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1167_cast_fp16 = conv(dilations = var_1167_dilations_0, groups = var_1167_groups_0, pad = var_1167_pad_0, pad_type = var_1167_pad_type_0, strides = var_1167_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1167_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_11_cast_fp16 = add(x = var_1161_cast_fp16, y = var_1167_cast_fp16)[name = string("query_11_cast_fp16")];
+            string var_1176_pad_type_0 = const()[name = string("op_1176_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1176_strides_0 = const()[name = string("op_1176_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1176_pad_0 = const()[name = string("op_1176_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1176_dilations_0 = const()[name = string("op_1176_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1176_groups_0 = const()[name = string("op_1176_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30744128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31039104))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1176_cast_fp16 = conv(dilations = var_1176_dilations_0, groups = var_1176_groups_0, pad = var_1176_pad_0, pad_type = var_1176_pad_type_0, strides = var_1176_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1176_cast_fp16")];
+            string var_1182_pad_type_0 = const()[name = string("op_1182_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1182_strides_0 = const()[name = string("op_1182_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1182_pad_0 = const()[name = string("op_1182_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1182_dilations_0 = const()[name = string("op_1182_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1182_groups_0 = const()[name = string("op_1182_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31048960))), nonzero_data = tensor<fp16, [4811]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31039232))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1182_cast_fp16 = conv(dilations = var_1182_dilations_0, groups = var_1182_groups_0, pad = var_1182_pad_0, pad_type = var_1182_pad_type_0, strides = var_1182_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1182_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_11_cast_fp16 = add(x = var_1176_cast_fp16, y = var_1182_cast_fp16)[name = string("key_11_cast_fp16")];
+            string var_1192_pad_type_0 = const()[name = string("op_1192_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1192_strides_0 = const()[name = string("op_1192_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1192_pad_0 = const()[name = string("op_1192_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1192_dilations_0 = const()[name = string("op_1192_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1192_groups_0 = const()[name = string("op_1192_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31122752))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31417728))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31417856)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1192_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1192_dilations_0, groups = var_1192_groups_0, pad = var_1192_pad_0, pad_type = var_1192_pad_type_0, strides = var_1192_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = string("op_1192_cast_fp16")];
+            string var_1198_pad_type_0 = const()[name = string("op_1198_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1198_strides_0 = const()[name = string("op_1198_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1198_pad_0 = const()[name = string("op_1198_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1198_dilations_0 = const()[name = string("op_1198_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1198_groups_0 = const()[name = string("op_1198_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31426368))), nonzero_data = tensor<fp16, [3420]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31419456))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1198_cast_fp16 = conv(dilations = var_1198_dilations_0, groups = var_1198_groups_0, pad = var_1198_pad_0, pad_type = var_1198_pad_type_0, strides = var_1198_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = string("op_1198_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_11_cast_fp16 = add(x = var_1192_cast_fp16, y = var_1198_cast_fp16)[name = string("value_11_cast_fp16")];
+            tensor<int32, [4]> var_1201 = const()[name = string("op_1201"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_11_cast_fp16 = reshape(shape = var_1201, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")];
+            fp16 var_1203_to_fp16 = const()[name = string("op_1203_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1204_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_1203_to_fp16)[name = string("op_1204_cast_fp16")];
+            tensor<int32, [4]> var_1205 = const()[name = string("op_1205"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1206_cast_fp16 = reshape(shape = var_1205, x = key_11_cast_fp16)[name = string("op_1206_cast_fp16")];
+            bool mh_w_11_transpose_x_0 = const()[name = string("mh_w_11_transpose_x_0"), val = bool(true)];
+            bool mh_w_11_transpose_y_0 = const()[name = string("mh_w_11_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_1204_cast_fp16, y = var_1206_cast_fp16)[name = string("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1209_cast_fp16 = softmax(axis = var_1120, x = mh_w_11_cast_fp16)[name = string("op_1209_cast_fp16")];
+            tensor<int32, [4]> var_1210 = const()[name = string("op_1210"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1211_cast_fp16 = reshape(shape = var_1210, x = value_11_cast_fp16)[name = string("op_1211_cast_fp16")];
+            bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)];
+            bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_1211_cast_fp16, y = var_1209_cast_fp16)[name = string("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_1214 = const()[name = string("op_1214"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_41_cast_fp16 = reshape(shape = var_1214, x = attn_11_cast_fp16)[name = string("input_41_cast_fp16")];
+            string var_1224_pad_type_0 = const()[name = string("op_1224_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1224_strides_0 = const()[name = string("op_1224_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1224_pad_0 = const()[name = string("op_1224_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1224_dilations_0 = const()[name = string("op_1224_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1224_groups_0 = const()[name = string("op_1224_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31500160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31795136))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31795264)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1224_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1224_dilations_0, groups = var_1224_groups_0, pad = var_1224_pad_0, pad_type = var_1224_pad_type_0, strides = var_1224_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1224_cast_fp16")];
+            string var_1230_pad_type_0 = const()[name = string("op_1230_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1230_strides_0 = const()[name = string("op_1230_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1230_pad_0 = const()[name = string("op_1230_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1230_dilations_0 = const()[name = string("op_1230_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1230_groups_0 = const()[name = string("op_1230_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31804736))), nonzero_data = tensor<fp16, [3878]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31796864))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1230_cast_fp16 = conv(dilations = var_1230_dilations_0, groups = var_1230_groups_0, pad = var_1230_pad_0, pad_type = var_1230_pad_type_0, strides = var_1230_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1230_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_23_cast_fp16 = add(x = var_1224_cast_fp16, y = var_1230_cast_fp16)[name = string("obj_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = string("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1241_to_fp16 = const()[name = string("op_1241_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1241_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")];
+            tensor<fp16, [768]> input_43_gamma_0_to_fp16 = const()[name = string("input_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31878528)))];
+            tensor<fp16, [768]> input_43_beta_0_to_fp16 = const()[name = string("input_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31880128)))];
+            fp16 input_43_epsilon_0_to_fp16 = const()[name = string("input_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_43_cast_fp16")];
+            string var_1259_pad_type_0 = const()[name = string("op_1259_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1259_strides_0 = const()[name = string("op_1259_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1259_pad_0 = const()[name = string("op_1259_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1259_dilations_0 = const()[name = string("op_1259_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1259_groups_0 = const()[name = string("op_1259_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31881728))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33061440))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33061568)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1259_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1259_dilations_0, groups = var_1259_groups_0, pad = var_1259_pad_0, pad_type = var_1259_pad_type_0, strides = var_1259_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1259_cast_fp16")];
+            string var_1265_pad_type_0 = const()[name = string("op_1265_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1265_strides_0 = const()[name = string("op_1265_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1265_pad_0 = const()[name = string("op_1265_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1265_dilations_0 = const()[name = string("op_1265_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1265_groups_0 = const()[name = string("op_1265_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33101248))), nonzero_data = tensor<fp16, [16704]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33067776))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1265_cast_fp16 = conv(dilations = var_1265_dilations_0, groups = var_1265_groups_0, pad = var_1265_pad_0, pad_type = var_1265_pad_type_0, strides = var_1265_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1265_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_45_cast_fp16 = add(x = var_1259_cast_fp16, y = var_1265_cast_fp16)[name = string("input_45_cast_fp16")];
+            string input_47_mode_0 = const()[name = string("input_47_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = string("input_47_cast_fp16")];
+            string var_1276_pad_type_0 = const()[name = string("op_1276_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1276_strides_0 = const()[name = string("op_1276_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1276_pad_0 = const()[name = string("op_1276_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1276_dilations_0 = const()[name = string("op_1276_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1276_groups_0 = const()[name = string("op_1276_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33396224))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34575936))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34576064)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1276_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1276_dilations_0, groups = var_1276_groups_0, pad = var_1276_pad_0, pad_type = var_1276_pad_type_0, strides = var_1276_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = string("op_1276_cast_fp16")];
+            string var_1282_pad_type_0 = const()[name = string("op_1282_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1282_strides_0 = const()[name = string("op_1282_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1282_pad_0 = const()[name = string("op_1282_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1282_dilations_0 = const()[name = string("op_1282_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1282_groups_0 = const()[name = string("op_1282_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34614208))), nonzero_data = tensor<fp16, [18230]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34577664))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1282_cast_fp16 = conv(dilations = var_1282_dilations_0, groups = var_1282_groups_0, pad = var_1282_pad_0, pad_type = var_1282_pad_type_0, strides = var_1282_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = string("op_1282_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_15_cast_fp16 = add(x = var_1276_cast_fp16, y = var_1282_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_25_cast_fp16")];
+            int32 var_1292 = const()[name = string("op_1292"), val = int32(3)];
+            tensor<int32, [1]> out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1311_to_fp16 = const()[name = string("op_1311_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1311_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")];
+            tensor<fp16, [768]> obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34909184)))];
+            tensor<fp16, [768]> obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34910784)))];
+            fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_25_cast_fp16")];
+            string var_1333_pad_type_0 = const()[name = string("op_1333_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1333_strides_0 = const()[name = string("op_1333_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1333_pad_0 = const()[name = string("op_1333_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1333_dilations_0 = const()[name = string("op_1333_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1333_groups_0 = const()[name = string("op_1333_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34912384))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35207360))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35207488)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1333_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1333_dilations_0, groups = var_1333_groups_0, pad = var_1333_pad_0, pad_type = var_1333_pad_type_0, strides = var_1333_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1333_cast_fp16")];
+            string var_1339_pad_type_0 = const()[name = string("op_1339_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1339_strides_0 = const()[name = string("op_1339_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1339_pad_0 = const()[name = string("op_1339_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1339_dilations_0 = const()[name = string("op_1339_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1339_groups_0 = const()[name = string("op_1339_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35217280))), nonzero_data = tensor<fp16, [4054]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35209088))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1339_cast_fp16 = conv(dilations = var_1339_dilations_0, groups = var_1339_groups_0, pad = var_1339_pad_0, pad_type = var_1339_pad_type_0, strides = var_1339_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1339_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_13_cast_fp16 = add(x = var_1333_cast_fp16, y = var_1339_cast_fp16)[name = string("query_13_cast_fp16")];
+            string var_1348_pad_type_0 = const()[name = string("op_1348_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1348_strides_0 = const()[name = string("op_1348_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1348_pad_0 = const()[name = string("op_1348_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1348_dilations_0 = const()[name = string("op_1348_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1348_groups_0 = const()[name = string("op_1348_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35291072))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35586048))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1348_cast_fp16 = conv(dilations = var_1348_dilations_0, groups = var_1348_groups_0, pad = var_1348_pad_0, pad_type = var_1348_pad_type_0, strides = var_1348_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1348_cast_fp16")];
+            string var_1354_pad_type_0 = const()[name = string("op_1354_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1354_strides_0 = const()[name = string("op_1354_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1354_pad_0 = const()[name = string("op_1354_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1354_dilations_0 = const()[name = string("op_1354_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1354_groups_0 = const()[name = string("op_1354_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35593984))), nonzero_data = tensor<fp16, [3869]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35586176))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1354_cast_fp16 = conv(dilations = var_1354_dilations_0, groups = var_1354_groups_0, pad = var_1354_pad_0, pad_type = var_1354_pad_type_0, strides = var_1354_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1354_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_13_cast_fp16 = add(x = var_1348_cast_fp16, y = var_1354_cast_fp16)[name = string("key_13_cast_fp16")];
+            string var_1364_pad_type_0 = const()[name = string("op_1364_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1364_strides_0 = const()[name = string("op_1364_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1364_pad_0 = const()[name = string("op_1364_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1364_dilations_0 = const()[name = string("op_1364_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1364_groups_0 = const()[name = string("op_1364_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35667776))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35962752))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35962880)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1364_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1364_dilations_0, groups = var_1364_groups_0, pad = var_1364_pad_0, pad_type = var_1364_pad_type_0, strides = var_1364_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_1364_cast_fp16")];
+            string var_1370_pad_type_0 = const()[name = string("op_1370_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1370_strides_0 = const()[name = string("op_1370_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1370_pad_0 = const()[name = string("op_1370_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1370_dilations_0 = const()[name = string("op_1370_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1370_groups_0 = const()[name = string("op_1370_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35970496))), nonzero_data = tensor<fp16, [2961]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35964480))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1370_cast_fp16 = conv(dilations = var_1370_dilations_0, groups = var_1370_groups_0, pad = var_1370_pad_0, pad_type = var_1370_pad_type_0, strides = var_1370_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_1370_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_13_cast_fp16 = add(x = var_1364_cast_fp16, y = var_1370_cast_fp16)[name = string("value_13_cast_fp16")];
+            tensor<int32, [4]> var_1373 = const()[name = string("op_1373"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_13_cast_fp16 = reshape(shape = var_1373, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")];
+            fp16 var_1375_to_fp16 = const()[name = string("op_1375_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1376_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1375_to_fp16)[name = string("op_1376_cast_fp16")];
+            tensor<int32, [4]> var_1377 = const()[name = string("op_1377"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1378_cast_fp16 = reshape(shape = var_1377, x = key_13_cast_fp16)[name = string("op_1378_cast_fp16")];
+            bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)];
+            bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_1376_cast_fp16, y = var_1378_cast_fp16)[name = string("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1381_cast_fp16 = softmax(axis = var_1292, x = mh_w_13_cast_fp16)[name = string("op_1381_cast_fp16")];
+            tensor<int32, [4]> var_1382 = const()[name = string("op_1382"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1383_cast_fp16 = reshape(shape = var_1382, x = value_13_cast_fp16)[name = string("op_1383_cast_fp16")];
+            bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)];
+            bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1383_cast_fp16, y = var_1381_cast_fp16)[name = string("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_1386 = const()[name = string("op_1386"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_49_cast_fp16 = reshape(shape = var_1386, x = attn_13_cast_fp16)[name = string("input_49_cast_fp16")];
+            string var_1396_pad_type_0 = const()[name = string("op_1396_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1396_strides_0 = const()[name = string("op_1396_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1396_pad_0 = const()[name = string("op_1396_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1396_dilations_0 = const()[name = string("op_1396_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1396_groups_0 = const()[name = string("op_1396_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36044288))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36339264))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36339392)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1396_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1396_dilations_0, groups = var_1396_groups_0, pad = var_1396_pad_0, pad_type = var_1396_pad_type_0, strides = var_1396_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1396_cast_fp16")];
+            string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36346496))), nonzero_data = tensor<fp16, [2712]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36340992))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1402_cast_fp16 = conv(dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1402_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_27_cast_fp16 = add(x = var_1396_cast_fp16, y = var_1402_cast_fp16)[name = string("obj_27_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = string("inputs_27_cast_fp16")];
+            tensor<int32, [1]> out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1413_to_fp16 = const()[name = string("op_1413_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1413_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")];
+            tensor<fp16, [768]> input_51_gamma_0_to_fp16 = const()[name = string("input_51_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36420288)))];
+            tensor<fp16, [768]> input_51_beta_0_to_fp16 = const()[name = string("input_51_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36421888)))];
+            fp16 input_51_epsilon_0_to_fp16 = const()[name = string("input_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("input_51_cast_fp16")];
+            string var_1431_pad_type_0 = const()[name = string("op_1431_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1431_strides_0 = const()[name = string("op_1431_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1431_pad_0 = const()[name = string("op_1431_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1431_dilations_0 = const()[name = string("op_1431_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1431_groups_0 = const()[name = string("op_1431_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36423488))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37603200))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37603328)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1431_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_1431_dilations_0, groups = var_1431_groups_0, pad = var_1431_pad_0, pad_type = var_1431_pad_type_0, strides = var_1431_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1431_cast_fp16")];
+            string var_1437_pad_type_0 = const()[name = string("op_1437_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1437_strides_0 = const()[name = string("op_1437_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1437_pad_0 = const()[name = string("op_1437_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1437_dilations_0 = const()[name = string("op_1437_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1437_groups_0 = const()[name = string("op_1437_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37638976))), nonzero_data = tensor<fp16, [14658]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37609536))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1437_cast_fp16 = conv(dilations = var_1437_dilations_0, groups = var_1437_groups_0, pad = var_1437_pad_0, pad_type = var_1437_pad_type_0, strides = var_1437_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1437_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_53_cast_fp16 = add(x = var_1431_cast_fp16, y = var_1437_cast_fp16)[name = string("input_53_cast_fp16")];
+            string input_55_mode_0 = const()[name = string("input_55_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = string("input_55_cast_fp16")];
+            string var_1448_pad_type_0 = const()[name = string("op_1448_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1448_strides_0 = const()[name = string("op_1448_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1448_pad_0 = const()[name = string("op_1448_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1448_dilations_0 = const()[name = string("op_1448_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1448_groups_0 = const()[name = string("op_1448_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37933952))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39113664))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39113792)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1448_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_1448_dilations_0, groups = var_1448_groups_0, pad = var_1448_pad_0, pad_type = var_1448_pad_type_0, strides = var_1448_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1448_cast_fp16")];
+            string var_1454_pad_type_0 = const()[name = string("op_1454_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1454_strides_0 = const()[name = string("op_1454_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1454_pad_0 = const()[name = string("op_1454_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1454_dilations_0 = const()[name = string("op_1454_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1454_groups_0 = const()[name = string("op_1454_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39142720))), nonzero_data = tensor<fp16, [13601]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39115392))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1454_cast_fp16 = conv(dilations = var_1454_dilations_0, groups = var_1454_groups_0, pad = var_1454_pad_0, pad_type = var_1454_pad_type_0, strides = var_1454_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1454_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_17_cast_fp16 = add(x = var_1448_cast_fp16, y = var_1454_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_29_cast_fp16")];
+            int32 var_1464 = const()[name = string("op_1464"), val = int32(3)];
+            tensor<int32, [1]> out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1483_to_fp16 = const()[name = string("op_1483_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1483_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")];
+            tensor<fp16, [768]> obj_29_gamma_0_to_fp16 = const()[name = string("obj_29_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39437696)))];
+            tensor<fp16, [768]> obj_29_beta_0_to_fp16 = const()[name = string("obj_29_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39439296)))];
+            fp16 obj_29_epsilon_0_to_fp16 = const()[name = string("obj_29_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("obj_29_cast_fp16")];
+            string var_1505_pad_type_0 = const()[name = string("op_1505_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1505_strides_0 = const()[name = string("op_1505_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1505_pad_0 = const()[name = string("op_1505_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1505_dilations_0 = const()[name = string("op_1505_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1505_groups_0 = const()[name = string("op_1505_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39440896))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39735872))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39736000)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1505_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1505_dilations_0, groups = var_1505_groups_0, pad = var_1505_pad_0, pad_type = var_1505_pad_type_0, strides = var_1505_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1505_cast_fp16")];
+            string var_1511_pad_type_0 = const()[name = string("op_1511_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1511_strides_0 = const()[name = string("op_1511_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1511_pad_0 = const()[name = string("op_1511_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1511_dilations_0 = const()[name = string("op_1511_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1511_groups_0 = const()[name = string("op_1511_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39745536))), nonzero_data = tensor<fp16, [3933]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39737600))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1511_cast_fp16 = conv(dilations = var_1511_dilations_0, groups = var_1511_groups_0, pad = var_1511_pad_0, pad_type = var_1511_pad_type_0, strides = var_1511_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1511_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_15_cast_fp16 = add(x = var_1505_cast_fp16, y = var_1511_cast_fp16)[name = string("query_15_cast_fp16")];
+            string var_1520_pad_type_0 = const()[name = string("op_1520_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1520_strides_0 = const()[name = string("op_1520_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1520_pad_0 = const()[name = string("op_1520_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1520_dilations_0 = const()[name = string("op_1520_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1520_groups_0 = const()[name = string("op_1520_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39819328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40114304))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1520_cast_fp16 = conv(dilations = var_1520_dilations_0, groups = var_1520_groups_0, pad = var_1520_pad_0, pad_type = var_1520_pad_type_0, strides = var_1520_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1520_cast_fp16")];
+            string var_1526_pad_type_0 = const()[name = string("op_1526_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1526_strides_0 = const()[name = string("op_1526_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1526_pad_0 = const()[name = string("op_1526_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1526_dilations_0 = const()[name = string("op_1526_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1526_groups_0 = const()[name = string("op_1526_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40123840))), nonzero_data = tensor<fp16, [4657]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40114432))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1526_cast_fp16 = conv(dilations = var_1526_dilations_0, groups = var_1526_groups_0, pad = var_1526_pad_0, pad_type = var_1526_pad_type_0, strides = var_1526_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1526_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_15_cast_fp16 = add(x = var_1520_cast_fp16, y = var_1526_cast_fp16)[name = string("key_15_cast_fp16")];
+            string var_1536_pad_type_0 = const()[name = string("op_1536_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1536_strides_0 = const()[name = string("op_1536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1536_pad_0 = const()[name = string("op_1536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1536_dilations_0 = const()[name = string("op_1536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1536_groups_0 = const()[name = string("op_1536_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40197632))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40492608))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40492736)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1536_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1536_dilations_0, groups = var_1536_groups_0, pad = var_1536_pad_0, pad_type = var_1536_pad_type_0, strides = var_1536_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = string("op_1536_cast_fp16")];
+            string var_1542_pad_type_0 = const()[name = string("op_1542_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1542_strides_0 = const()[name = string("op_1542_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1542_pad_0 = const()[name = string("op_1542_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1542_dilations_0 = const()[name = string("op_1542_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1542_groups_0 = const()[name = string("op_1542_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40503360))), nonzero_data = tensor<fp16, [4463]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40494336))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1542_cast_fp16 = conv(dilations = var_1542_dilations_0, groups = var_1542_groups_0, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1542_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = string("op_1542_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_15_cast_fp16 = add(x = var_1536_cast_fp16, y = var_1542_cast_fp16)[name = string("value_15_cast_fp16")];
+            tensor<int32, [4]> var_1545 = const()[name = string("op_1545"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_15_cast_fp16 = reshape(shape = var_1545, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")];
+            fp16 var_1547_to_fp16 = const()[name = string("op_1547_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1548_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1547_to_fp16)[name = string("op_1548_cast_fp16")];
+            tensor<int32, [4]> var_1549 = const()[name = string("op_1549"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1550_cast_fp16 = reshape(shape = var_1549, x = key_15_cast_fp16)[name = string("op_1550_cast_fp16")];
+            bool mh_w_15_transpose_x_0 = const()[name = string("mh_w_15_transpose_x_0"), val = bool(true)];
+            bool mh_w_15_transpose_y_0 = const()[name = string("mh_w_15_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_15_cast_fp16 = matmul(transpose_x = mh_w_15_transpose_x_0, transpose_y = mh_w_15_transpose_y_0, x = var_1548_cast_fp16, y = var_1550_cast_fp16)[name = string("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1553_cast_fp16 = softmax(axis = var_1464, x = mh_w_15_cast_fp16)[name = string("op_1553_cast_fp16")];
+            tensor<int32, [4]> var_1554 = const()[name = string("op_1554"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1555_cast_fp16 = reshape(shape = var_1554, x = value_15_cast_fp16)[name = string("op_1555_cast_fp16")];
+            bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)];
+            bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1555_cast_fp16, y = var_1553_cast_fp16)[name = string("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_1558 = const()[name = string("op_1558"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_57_cast_fp16 = reshape(shape = var_1558, x = attn_15_cast_fp16)[name = string("input_57_cast_fp16")];
+            string var_1568_pad_type_0 = const()[name = string("op_1568_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1568_strides_0 = const()[name = string("op_1568_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1568_pad_0 = const()[name = string("op_1568_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1568_dilations_0 = const()[name = string("op_1568_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1568_groups_0 = const()[name = string("op_1568_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40577152))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40872128))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40872256)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1568_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1568_dilations_0, groups = var_1568_groups_0, pad = var_1568_pad_0, pad_type = var_1568_pad_type_0, strides = var_1568_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = string("op_1568_cast_fp16")];
+            string var_1574_pad_type_0 = const()[name = string("op_1574_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1574_strides_0 = const()[name = string("op_1574_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1574_pad_0 = const()[name = string("op_1574_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1574_dilations_0 = const()[name = string("op_1574_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1574_groups_0 = const()[name = string("op_1574_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40885568))), nonzero_data = tensor<fp16, [5795]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40873856))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1574_cast_fp16 = conv(dilations = var_1574_dilations_0, groups = var_1574_groups_0, pad = var_1574_pad_0, pad_type = var_1574_pad_type_0, strides = var_1574_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = string("op_1574_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_31_cast_fp16 = add(x = var_1568_cast_fp16, y = var_1574_cast_fp16)[name = string("obj_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_31_cast_fp16")];
+            tensor<int32, [1]> out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1585_to_fp16 = const()[name = string("op_1585_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1585_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")];
+            tensor<fp16, [768]> input_59_gamma_0_to_fp16 = const()[name = string("input_59_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40959360)))];
+            tensor<fp16, [768]> input_59_beta_0_to_fp16 = const()[name = string("input_59_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40960960)))];
+            fp16 input_59_epsilon_0_to_fp16 = const()[name = string("input_59_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("input_59_cast_fp16")];
+            string var_1603_pad_type_0 = const()[name = string("op_1603_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1603_strides_0 = const()[name = string("op_1603_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1603_pad_0 = const()[name = string("op_1603_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1603_dilations_0 = const()[name = string("op_1603_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1603_groups_0 = const()[name = string("op_1603_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40962560))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42142272))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42142400)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1603_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_1603_dilations_0, groups = var_1603_groups_0, pad = var_1603_pad_0, pad_type = var_1603_pad_type_0, strides = var_1603_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1603_cast_fp16")];
+            string var_1609_pad_type_0 = const()[name = string("op_1609_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1609_strides_0 = const()[name = string("op_1609_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1609_pad_0 = const()[name = string("op_1609_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1609_dilations_0 = const()[name = string("op_1609_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1609_groups_0 = const()[name = string("op_1609_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42172224))), nonzero_data = tensor<fp16, [11748]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42148608))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1609_cast_fp16 = conv(dilations = var_1609_dilations_0, groups = var_1609_groups_0, pad = var_1609_pad_0, pad_type = var_1609_pad_type_0, strides = var_1609_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1609_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_61_cast_fp16 = add(x = var_1603_cast_fp16, y = var_1609_cast_fp16)[name = string("input_61_cast_fp16")];
+            string input_63_mode_0 = const()[name = string("input_63_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = string("input_63_cast_fp16")];
+            string var_1620_pad_type_0 = const()[name = string("op_1620_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1620_strides_0 = const()[name = string("op_1620_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1620_pad_0 = const()[name = string("op_1620_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1620_dilations_0 = const()[name = string("op_1620_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1620_groups_0 = const()[name = string("op_1620_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42467200))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43646912))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43647040)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1620_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_1620_dilations_0, groups = var_1620_groups_0, pad = var_1620_pad_0, pad_type = var_1620_pad_type_0, strides = var_1620_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_1620_cast_fp16")];
+            string var_1626_pad_type_0 = const()[name = string("op_1626_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1626_strides_0 = const()[name = string("op_1626_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1626_pad_0 = const()[name = string("op_1626_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1626_dilations_0 = const()[name = string("op_1626_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1626_groups_0 = const()[name = string("op_1626_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43671552))), nonzero_data = tensor<fp16, [11417]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43648640))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1626_cast_fp16 = conv(dilations = var_1626_dilations_0, groups = var_1626_groups_0, pad = var_1626_pad_0, pad_type = var_1626_pad_type_0, strides = var_1626_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_1626_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_19_cast_fp16 = add(x = var_1620_cast_fp16, y = var_1626_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_33_cast_fp16")];
+            int32 var_1636 = const()[name = string("op_1636"), val = int32(3)];
+            tensor<int32, [1]> out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1655_to_fp16 = const()[name = string("op_1655_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1655_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")];
+            tensor<fp16, [768]> obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43966528)))];
+            tensor<fp16, [768]> obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43968128)))];
+            fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_33_cast_fp16")];
+            string var_1677_pad_type_0 = const()[name = string("op_1677_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1677_strides_0 = const()[name = string("op_1677_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1677_pad_0 = const()[name = string("op_1677_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1677_dilations_0 = const()[name = string("op_1677_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1677_groups_0 = const()[name = string("op_1677_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43969728))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44264704))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44264832)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1677_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1677_dilations_0, groups = var_1677_groups_0, pad = var_1677_pad_0, pad_type = var_1677_pad_type_0, strides = var_1677_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1677_cast_fp16")];
+            string var_1683_pad_type_0 = const()[name = string("op_1683_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1683_strides_0 = const()[name = string("op_1683_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1683_pad_0 = const()[name = string("op_1683_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1683_dilations_0 = const()[name = string("op_1683_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1683_groups_0 = const()[name = string("op_1683_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44272704))), nonzero_data = tensor<fp16, [3098]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44266432))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1683_cast_fp16 = conv(dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1683_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_17_cast_fp16 = add(x = var_1677_cast_fp16, y = var_1683_cast_fp16)[name = string("query_17_cast_fp16")];
+            string var_1692_pad_type_0 = const()[name = string("op_1692_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1692_strides_0 = const()[name = string("op_1692_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1692_pad_0 = const()[name = string("op_1692_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1692_dilations_0 = const()[name = string("op_1692_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1692_groups_0 = const()[name = string("op_1692_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44346496))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44641472))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1692_cast_fp16 = conv(dilations = var_1692_dilations_0, groups = var_1692_groups_0, pad = var_1692_pad_0, pad_type = var_1692_pad_type_0, strides = var_1692_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1692_cast_fp16")];
+            string var_1698_pad_type_0 = const()[name = string("op_1698_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1698_strides_0 = const()[name = string("op_1698_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1698_pad_0 = const()[name = string("op_1698_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1698_dilations_0 = const()[name = string("op_1698_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1698_groups_0 = const()[name = string("op_1698_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44648384))), nonzero_data = tensor<fp16, [3329]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44641600))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1698_cast_fp16 = conv(dilations = var_1698_dilations_0, groups = var_1698_groups_0, pad = var_1698_pad_0, pad_type = var_1698_pad_type_0, strides = var_1698_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1698_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_17_cast_fp16 = add(x = var_1692_cast_fp16, y = var_1698_cast_fp16)[name = string("key_17_cast_fp16")];
+            string var_1708_pad_type_0 = const()[name = string("op_1708_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1708_strides_0 = const()[name = string("op_1708_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1708_pad_0 = const()[name = string("op_1708_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1708_dilations_0 = const()[name = string("op_1708_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1708_groups_0 = const()[name = string("op_1708_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44722176))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45017152))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45017280)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1708_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1708_dilations_0, groups = var_1708_groups_0, pad = var_1708_pad_0, pad_type = var_1708_pad_type_0, strides = var_1708_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_1708_cast_fp16")];
+            string var_1714_pad_type_0 = const()[name = string("op_1714_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1714_strides_0 = const()[name = string("op_1714_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1714_pad_0 = const()[name = string("op_1714_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1714_dilations_0 = const()[name = string("op_1714_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1714_groups_0 = const()[name = string("op_1714_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45025152))), nonzero_data = tensor<fp16, [3094]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45018880))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1714_cast_fp16 = conv(dilations = var_1714_dilations_0, groups = var_1714_groups_0, pad = var_1714_pad_0, pad_type = var_1714_pad_type_0, strides = var_1714_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_1714_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_17_cast_fp16 = add(x = var_1708_cast_fp16, y = var_1714_cast_fp16)[name = string("value_17_cast_fp16")];
+            tensor<int32, [4]> var_1717 = const()[name = string("op_1717"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_17_cast_fp16 = reshape(shape = var_1717, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")];
+            fp16 var_1719_to_fp16 = const()[name = string("op_1719_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1720_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1719_to_fp16)[name = string("op_1720_cast_fp16")];
+            tensor<int32, [4]> var_1721 = const()[name = string("op_1721"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1722_cast_fp16 = reshape(shape = var_1721, x = key_17_cast_fp16)[name = string("op_1722_cast_fp16")];
+            bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)];
+            bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_1720_cast_fp16, y = var_1722_cast_fp16)[name = string("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1725_cast_fp16 = softmax(axis = var_1636, x = mh_w_17_cast_fp16)[name = string("op_1725_cast_fp16")];
+            tensor<int32, [4]> var_1726 = const()[name = string("op_1726"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1727_cast_fp16 = reshape(shape = var_1726, x = value_17_cast_fp16)[name = string("op_1727_cast_fp16")];
+            bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)];
+            bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1727_cast_fp16, y = var_1725_cast_fp16)[name = string("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1730 = const()[name = string("op_1730"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_65_cast_fp16 = reshape(shape = var_1730, x = attn_17_cast_fp16)[name = string("input_65_cast_fp16")];
+            string var_1740_pad_type_0 = const()[name = string("op_1740_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1740_strides_0 = const()[name = string("op_1740_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1740_pad_0 = const()[name = string("op_1740_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1740_dilations_0 = const()[name = string("op_1740_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1740_groups_0 = const()[name = string("op_1740_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45098944))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45393920))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45394048)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1740_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1740_dilations_0, groups = var_1740_groups_0, pad = var_1740_pad_0, pad_type = var_1740_pad_type_0, strides = var_1740_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_1740_cast_fp16")];
+            string var_1746_pad_type_0 = const()[name = string("op_1746_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1746_strides_0 = const()[name = string("op_1746_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1746_pad_0 = const()[name = string("op_1746_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1746_dilations_0 = const()[name = string("op_1746_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1746_groups_0 = const()[name = string("op_1746_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45402880))), nonzero_data = tensor<fp16, [3581]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45395648))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1746_cast_fp16 = conv(dilations = var_1746_dilations_0, groups = var_1746_groups_0, pad = var_1746_pad_0, pad_type = var_1746_pad_type_0, strides = var_1746_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_1746_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_35_cast_fp16 = add(x = var_1740_cast_fp16, y = var_1746_cast_fp16)[name = string("obj_35_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = string("inputs_35_cast_fp16")];
+            tensor<int32, [1]> out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1757_to_fp16 = const()[name = string("op_1757_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1757_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")];
+            tensor<fp16, [768]> input_67_gamma_0_to_fp16 = const()[name = string("input_67_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45476672)))];
+            tensor<fp16, [768]> input_67_beta_0_to_fp16 = const()[name = string("input_67_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45478272)))];
+            fp16 input_67_epsilon_0_to_fp16 = const()[name = string("input_67_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_67_cast_fp16")];
+            string var_1775_pad_type_0 = const()[name = string("op_1775_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1775_strides_0 = const()[name = string("op_1775_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1775_pad_0 = const()[name = string("op_1775_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1775_dilations_0 = const()[name = string("op_1775_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1775_groups_0 = const()[name = string("op_1775_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45479872))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46659584))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46659712)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1775_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_1775_dilations_0, groups = var_1775_groups_0, pad = var_1775_pad_0, pad_type = var_1775_pad_type_0, strides = var_1775_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = string("op_1775_cast_fp16")];
+            string var_1781_pad_type_0 = const()[name = string("op_1781_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1781_strides_0 = const()[name = string("op_1781_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1781_pad_0 = const()[name = string("op_1781_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1781_dilations_0 = const()[name = string("op_1781_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1781_groups_0 = const()[name = string("op_1781_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46692096))), nonzero_data = tensor<fp16, [13025]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46665920))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1781_cast_fp16 = conv(dilations = var_1781_dilations_0, groups = var_1781_groups_0, pad = var_1781_pad_0, pad_type = var_1781_pad_type_0, strides = var_1781_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = string("op_1781_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_69_cast_fp16 = add(x = var_1775_cast_fp16, y = var_1781_cast_fp16)[name = string("input_69_cast_fp16")];
+            string input_71_mode_0 = const()[name = string("input_71_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = string("input_71_cast_fp16")];
+            string var_1792_pad_type_0 = const()[name = string("op_1792_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1792_strides_0 = const()[name = string("op_1792_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1792_pad_0 = const()[name = string("op_1792_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1792_dilations_0 = const()[name = string("op_1792_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1792_groups_0 = const()[name = string("op_1792_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46987072))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48166784))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48166912)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1792_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_1792_dilations_0, groups = var_1792_groups_0, pad = var_1792_pad_0, pad_type = var_1792_pad_type_0, strides = var_1792_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_1792_cast_fp16")];
+            string var_1798_pad_type_0 = const()[name = string("op_1798_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1798_strides_0 = const()[name = string("op_1798_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1798_pad_0 = const()[name = string("op_1798_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1798_dilations_0 = const()[name = string("op_1798_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1798_groups_0 = const()[name = string("op_1798_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48189952))), nonzero_data = tensor<fp16, [10681]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48168512))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1798_cast_fp16 = conv(dilations = var_1798_dilations_0, groups = var_1798_groups_0, pad = var_1798_pad_0, pad_type = var_1798_pad_type_0, strides = var_1798_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_1798_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_21_cast_fp16 = add(x = var_1792_cast_fp16, y = var_1798_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_37_cast_fp16")];
+            int32 var_1808 = const()[name = string("op_1808"), val = int32(3)];
+            tensor<int32, [1]> out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1827_to_fp16 = const()[name = string("op_1827_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1827_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")];
+            tensor<fp16, [768]> obj_37_gamma_0_to_fp16 = const()[name = string("obj_37_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48484928)))];
+            tensor<fp16, [768]> obj_37_beta_0_to_fp16 = const()[name = string("obj_37_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48486528)))];
+            fp16 obj_37_epsilon_0_to_fp16 = const()[name = string("obj_37_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_37_cast_fp16")];
+            string var_1849_pad_type_0 = const()[name = string("op_1849_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1849_strides_0 = const()[name = string("op_1849_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1849_pad_0 = const()[name = string("op_1849_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1849_dilations_0 = const()[name = string("op_1849_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1849_groups_0 = const()[name = string("op_1849_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48488128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48783104))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48783232)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1849_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1849_dilations_0, groups = var_1849_groups_0, pad = var_1849_pad_0, pad_type = var_1849_pad_type_0, strides = var_1849_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1849_cast_fp16")];
+            string var_1855_pad_type_0 = const()[name = string("op_1855_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1855_strides_0 = const()[name = string("op_1855_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1855_pad_0 = const()[name = string("op_1855_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1855_dilations_0 = const()[name = string("op_1855_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1855_groups_0 = const()[name = string("op_1855_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48790784))), nonzero_data = tensor<fp16, [2925]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48784832))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1855_cast_fp16 = conv(dilations = var_1855_dilations_0, groups = var_1855_groups_0, pad = var_1855_pad_0, pad_type = var_1855_pad_type_0, strides = var_1855_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1855_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_19_cast_fp16 = add(x = var_1849_cast_fp16, y = var_1855_cast_fp16)[name = string("query_19_cast_fp16")];
+            string var_1864_pad_type_0 = const()[name = string("op_1864_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1864_strides_0 = const()[name = string("op_1864_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1864_pad_0 = const()[name = string("op_1864_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1864_dilations_0 = const()[name = string("op_1864_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1864_groups_0 = const()[name = string("op_1864_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48864576))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49159552))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1864_cast_fp16 = conv(dilations = var_1864_dilations_0, groups = var_1864_groups_0, pad = var_1864_pad_0, pad_type = var_1864_pad_type_0, strides = var_1864_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1864_cast_fp16")];
+            string var_1870_pad_type_0 = const()[name = string("op_1870_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1870_strides_0 = const()[name = string("op_1870_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1870_pad_0 = const()[name = string("op_1870_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1870_dilations_0 = const()[name = string("op_1870_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1870_groups_0 = const()[name = string("op_1870_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49166528))), nonzero_data = tensor<fp16, [3376]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49159680))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1870_cast_fp16 = conv(dilations = var_1870_dilations_0, groups = var_1870_groups_0, pad = var_1870_pad_0, pad_type = var_1870_pad_type_0, strides = var_1870_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1870_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_19_cast_fp16 = add(x = var_1864_cast_fp16, y = var_1870_cast_fp16)[name = string("key_19_cast_fp16")];
+            string var_1880_pad_type_0 = const()[name = string("op_1880_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1880_strides_0 = const()[name = string("op_1880_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1880_pad_0 = const()[name = string("op_1880_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1880_dilations_0 = const()[name = string("op_1880_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1880_groups_0 = const()[name = string("op_1880_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49240320))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49535296))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49535424)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1880_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1880_dilations_0, groups = var_1880_groups_0, pad = var_1880_pad_0, pad_type = var_1880_pad_type_0, strides = var_1880_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = string("op_1880_cast_fp16")];
+            string var_1886_pad_type_0 = const()[name = string("op_1886_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1886_strides_0 = const()[name = string("op_1886_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1886_pad_0 = const()[name = string("op_1886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1886_dilations_0 = const()[name = string("op_1886_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1886_groups_0 = const()[name = string("op_1886_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49541568))), nonzero_data = tensor<fp16, [2221]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49537024))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1886_cast_fp16 = conv(dilations = var_1886_dilations_0, groups = var_1886_groups_0, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1886_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = string("op_1886_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_19_cast_fp16 = add(x = var_1880_cast_fp16, y = var_1886_cast_fp16)[name = string("value_19_cast_fp16")];
+            tensor<int32, [4]> var_1889 = const()[name = string("op_1889"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_19_cast_fp16 = reshape(shape = var_1889, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")];
+            fp16 var_1891_to_fp16 = const()[name = string("op_1891_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_1892_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1891_to_fp16)[name = string("op_1892_cast_fp16")];
+            tensor<int32, [4]> var_1893 = const()[name = string("op_1893"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1894_cast_fp16 = reshape(shape = var_1893, x = key_19_cast_fp16)[name = string("op_1894_cast_fp16")];
+            bool mh_w_19_transpose_x_0 = const()[name = string("mh_w_19_transpose_x_0"), val = bool(true)];
+            bool mh_w_19_transpose_y_0 = const()[name = string("mh_w_19_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1892_cast_fp16, y = var_1894_cast_fp16)[name = string("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_1897_cast_fp16 = softmax(axis = var_1808, x = mh_w_19_cast_fp16)[name = string("op_1897_cast_fp16")];
+            tensor<int32, [4]> var_1898 = const()[name = string("op_1898"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_1899_cast_fp16 = reshape(shape = var_1898, x = value_19_cast_fp16)[name = string("op_1899_cast_fp16")];
+            bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)];
+            bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1899_cast_fp16, y = var_1897_cast_fp16)[name = string("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1902 = const()[name = string("op_1902"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_73_cast_fp16 = reshape(shape = var_1902, x = attn_19_cast_fp16)[name = string("input_73_cast_fp16")];
+            string var_1912_pad_type_0 = const()[name = string("op_1912_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1912_strides_0 = const()[name = string("op_1912_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1912_pad_0 = const()[name = string("op_1912_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1912_dilations_0 = const()[name = string("op_1912_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1912_groups_0 = const()[name = string("op_1912_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49615360))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49910336))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49910464)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1912_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1912_dilations_0, groups = var_1912_groups_0, pad = var_1912_pad_0, pad_type = var_1912_pad_type_0, strides = var_1912_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_1912_cast_fp16")];
+            string var_1918_pad_type_0 = const()[name = string("op_1918_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1918_strides_0 = const()[name = string("op_1918_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1918_pad_0 = const()[name = string("op_1918_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1918_dilations_0 = const()[name = string("op_1918_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1918_groups_0 = const()[name = string("op_1918_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49916928))), nonzero_data = tensor<fp16, [2375]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49912064))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1918_cast_fp16 = conv(dilations = var_1918_dilations_0, groups = var_1918_groups_0, pad = var_1918_pad_0, pad_type = var_1918_pad_type_0, strides = var_1918_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_1918_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_39_cast_fp16 = add(x = var_1912_cast_fp16, y = var_1918_cast_fp16)[name = string("obj_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_39_cast_fp16")];
+            tensor<int32, [1]> out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1929_to_fp16 = const()[name = string("op_1929_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1929_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")];
+            tensor<fp16, [768]> input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49990720)))];
+            tensor<fp16, [768]> input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49992320)))];
+            fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("input_75_cast_fp16")];
+            string var_1947_pad_type_0 = const()[name = string("op_1947_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1947_strides_0 = const()[name = string("op_1947_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1947_pad_0 = const()[name = string("op_1947_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1947_dilations_0 = const()[name = string("op_1947_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1947_groups_0 = const()[name = string("op_1947_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49993920))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51173632))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51173760)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1947_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_1947_dilations_0, groups = var_1947_groups_0, pad = var_1947_pad_0, pad_type = var_1947_pad_type_0, strides = var_1947_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_1947_cast_fp16")];
+            string var_1953_pad_type_0 = const()[name = string("op_1953_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1953_strides_0 = const()[name = string("op_1953_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1953_pad_0 = const()[name = string("op_1953_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1953_dilations_0 = const()[name = string("op_1953_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1953_groups_0 = const()[name = string("op_1953_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51201984))), nonzero_data = tensor<fp16, [10959]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51179968))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_1953_cast_fp16 = conv(dilations = var_1953_dilations_0, groups = var_1953_groups_0, pad = var_1953_pad_0, pad_type = var_1953_pad_type_0, strides = var_1953_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_1953_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_77_cast_fp16 = add(x = var_1947_cast_fp16, y = var_1953_cast_fp16)[name = string("input_77_cast_fp16")];
+            string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")];
+            string var_1964_pad_type_0 = const()[name = string("op_1964_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1964_strides_0 = const()[name = string("op_1964_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1964_pad_0 = const()[name = string("op_1964_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1964_dilations_0 = const()[name = string("op_1964_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1964_groups_0 = const()[name = string("op_1964_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51496960))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52676672))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52676800)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_1964_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_1964_dilations_0, groups = var_1964_groups_0, pad = var_1964_pad_0, pad_type = var_1964_pad_type_0, strides = var_1964_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_1964_cast_fp16")];
+            string var_1970_pad_type_0 = const()[name = string("op_1970_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1970_strides_0 = const()[name = string("op_1970_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1970_pad_0 = const()[name = string("op_1970_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1970_dilations_0 = const()[name = string("op_1970_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1970_groups_0 = const()[name = string("op_1970_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52698816))), nonzero_data = tensor<fp16, [10150]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52678400))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_1970_cast_fp16 = conv(dilations = var_1970_dilations_0, groups = var_1970_groups_0, pad = var_1970_pad_0, pad_type = var_1970_pad_type_0, strides = var_1970_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_1970_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_23_cast_fp16 = add(x = var_1964_cast_fp16, y = var_1970_cast_fp16)[name = string("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_41_cast_fp16")];
+            int32 var_1980 = const()[name = string("op_1980"), val = int32(3)];
+            tensor<int32, [1]> out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1999_to_fp16 = const()[name = string("op_1999_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_1999_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")];
+            tensor<fp16, [768]> obj_41_gamma_0_to_fp16 = const()[name = string("obj_41_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52993792)))];
+            tensor<fp16, [768]> obj_41_beta_0_to_fp16 = const()[name = string("obj_41_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52995392)))];
+            fp16 obj_41_epsilon_0_to_fp16 = const()[name = string("obj_41_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("obj_41_cast_fp16")];
+            string var_2021_pad_type_0 = const()[name = string("op_2021_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2021_strides_0 = const()[name = string("op_2021_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2021_pad_0 = const()[name = string("op_2021_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2021_dilations_0 = const()[name = string("op_2021_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2021_groups_0 = const()[name = string("op_2021_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52996992))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53291968))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53292096)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2021_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2021_dilations_0, groups = var_2021_groups_0, pad = var_2021_pad_0, pad_type = var_2021_pad_type_0, strides = var_2021_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2021_cast_fp16")];
+            string var_2027_pad_type_0 = const()[name = string("op_2027_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2027_strides_0 = const()[name = string("op_2027_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2027_pad_0 = const()[name = string("op_2027_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2027_dilations_0 = const()[name = string("op_2027_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2027_groups_0 = const()[name = string("op_2027_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53299776))), nonzero_data = tensor<fp16, [2995]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53293696))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2027_cast_fp16 = conv(dilations = var_2027_dilations_0, groups = var_2027_groups_0, pad = var_2027_pad_0, pad_type = var_2027_pad_type_0, strides = var_2027_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2027_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_21_cast_fp16 = add(x = var_2021_cast_fp16, y = var_2027_cast_fp16)[name = string("query_21_cast_fp16")];
+            string var_2036_pad_type_0 = const()[name = string("op_2036_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2036_strides_0 = const()[name = string("op_2036_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2036_pad_0 = const()[name = string("op_2036_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2036_dilations_0 = const()[name = string("op_2036_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2036_groups_0 = const()[name = string("op_2036_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53373568))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53668544))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2036_cast_fp16 = conv(dilations = var_2036_dilations_0, groups = var_2036_groups_0, pad = var_2036_pad_0, pad_type = var_2036_pad_type_0, strides = var_2036_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2036_cast_fp16")];
+            string var_2042_pad_type_0 = const()[name = string("op_2042_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2042_strides_0 = const()[name = string("op_2042_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2042_pad_0 = const()[name = string("op_2042_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2042_dilations_0 = const()[name = string("op_2042_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2042_groups_0 = const()[name = string("op_2042_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53675072))), nonzero_data = tensor<fp16, [3150]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53668672))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2042_cast_fp16 = conv(dilations = var_2042_dilations_0, groups = var_2042_groups_0, pad = var_2042_pad_0, pad_type = var_2042_pad_type_0, strides = var_2042_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2042_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_21_cast_fp16 = add(x = var_2036_cast_fp16, y = var_2042_cast_fp16)[name = string("key_21_cast_fp16")];
+            string var_2052_pad_type_0 = const()[name = string("op_2052_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2052_strides_0 = const()[name = string("op_2052_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2052_pad_0 = const()[name = string("op_2052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2052_dilations_0 = const()[name = string("op_2052_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2052_groups_0 = const()[name = string("op_2052_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53748864))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54043840))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54043968)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2052_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2052_dilations_0, groups = var_2052_groups_0, pad = var_2052_pad_0, pad_type = var_2052_pad_type_0, strides = var_2052_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = string("op_2052_cast_fp16")];
+            string var_2058_pad_type_0 = const()[name = string("op_2058_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2058_strides_0 = const()[name = string("op_2058_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2058_pad_0 = const()[name = string("op_2058_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2058_dilations_0 = const()[name = string("op_2058_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2058_groups_0 = const()[name = string("op_2058_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54050496))), nonzero_data = tensor<fp16, [2430]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54045568))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2058_cast_fp16 = conv(dilations = var_2058_dilations_0, groups = var_2058_groups_0, pad = var_2058_pad_0, pad_type = var_2058_pad_type_0, strides = var_2058_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = string("op_2058_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_21_cast_fp16 = add(x = var_2052_cast_fp16, y = var_2058_cast_fp16)[name = string("value_21_cast_fp16")];
+            tensor<int32, [4]> var_2061 = const()[name = string("op_2061"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_21_cast_fp16 = reshape(shape = var_2061, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")];
+            fp16 var_2063_to_fp16 = const()[name = string("op_2063_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_2064_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_2063_to_fp16)[name = string("op_2064_cast_fp16")];
+            tensor<int32, [4]> var_2065 = const()[name = string("op_2065"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2066_cast_fp16 = reshape(shape = var_2065, x = key_21_cast_fp16)[name = string("op_2066_cast_fp16")];
+            bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)];
+            bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_2064_cast_fp16, y = var_2066_cast_fp16)[name = string("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_2069_cast_fp16 = softmax(axis = var_1980, x = mh_w_21_cast_fp16)[name = string("op_2069_cast_fp16")];
+            tensor<int32, [4]> var_2070 = const()[name = string("op_2070"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2071_cast_fp16 = reshape(shape = var_2070, x = value_21_cast_fp16)[name = string("op_2071_cast_fp16")];
+            bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)];
+            bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_2071_cast_fp16, y = var_2069_cast_fp16)[name = string("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_2074 = const()[name = string("op_2074"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_81_cast_fp16 = reshape(shape = var_2074, x = attn_21_cast_fp16)[name = string("input_81_cast_fp16")];
+            string var_2084_pad_type_0 = const()[name = string("op_2084_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2084_strides_0 = const()[name = string("op_2084_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2084_pad_0 = const()[name = string("op_2084_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2084_dilations_0 = const()[name = string("op_2084_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2084_groups_0 = const()[name = string("op_2084_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54124288))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54419264))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54419392)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2084_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2084_dilations_0, groups = var_2084_groups_0, pad = var_2084_pad_0, pad_type = var_2084_pad_type_0, strides = var_2084_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2084_cast_fp16")];
+            string var_2090_pad_type_0 = const()[name = string("op_2090_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2090_strides_0 = const()[name = string("op_2090_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2090_pad_0 = const()[name = string("op_2090_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2090_dilations_0 = const()[name = string("op_2090_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2090_groups_0 = const()[name = string("op_2090_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54426624))), nonzero_data = tensor<fp16, [2772]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54420992))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2090_cast_fp16 = conv(dilations = var_2090_dilations_0, groups = var_2090_groups_0, pad = var_2090_pad_0, pad_type = var_2090_pad_type_0, strides = var_2090_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2090_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_43_cast_fp16 = add(x = var_2084_cast_fp16, y = var_2090_cast_fp16)[name = string("obj_43_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = string("inputs_43_cast_fp16")];
+            tensor<int32, [1]> out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2101_to_fp16 = const()[name = string("op_2101_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2101_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")];
+            tensor<fp16, [768]> input_83_gamma_0_to_fp16 = const()[name = string("input_83_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54500416)))];
+            tensor<fp16, [768]> input_83_beta_0_to_fp16 = const()[name = string("input_83_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54502016)))];
+            fp16 input_83_epsilon_0_to_fp16 = const()[name = string("input_83_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("input_83_cast_fp16")];
+            string var_2119_pad_type_0 = const()[name = string("op_2119_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2119_strides_0 = const()[name = string("op_2119_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2119_pad_0 = const()[name = string("op_2119_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2119_dilations_0 = const()[name = string("op_2119_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2119_groups_0 = const()[name = string("op_2119_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54503616))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55683328))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55683456)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_2119_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_2119_dilations_0, groups = var_2119_groups_0, pad = var_2119_pad_0, pad_type = var_2119_pad_type_0, strides = var_2119_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2119_cast_fp16")];
+            string var_2125_pad_type_0 = const()[name = string("op_2125_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2125_strides_0 = const()[name = string("op_2125_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2125_pad_0 = const()[name = string("op_2125_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2125_dilations_0 = const()[name = string("op_2125_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2125_groups_0 = const()[name = string("op_2125_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55711488))), nonzero_data = tensor<fp16, [10858]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55689664))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_2125_cast_fp16 = conv(dilations = var_2125_dilations_0, groups = var_2125_groups_0, pad = var_2125_pad_0, pad_type = var_2125_pad_type_0, strides = var_2125_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2125_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_85_cast_fp16 = add(x = var_2119_cast_fp16, y = var_2125_cast_fp16)[name = string("input_85_cast_fp16")];
+            string input_87_mode_0 = const()[name = string("input_87_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = string("input_87_cast_fp16")];
+            string var_2136_pad_type_0 = const()[name = string("op_2136_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2136_strides_0 = const()[name = string("op_2136_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2136_pad_0 = const()[name = string("op_2136_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2136_dilations_0 = const()[name = string("op_2136_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2136_groups_0 = const()[name = string("op_2136_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56006464))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57186176))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57186304)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2136_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_2136_dilations_0, groups = var_2136_groups_0, pad = var_2136_pad_0, pad_type = var_2136_pad_type_0, strides = var_2136_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = string("op_2136_cast_fp16")];
+            string var_2142_pad_type_0 = const()[name = string("op_2142_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2142_strides_0 = const()[name = string("op_2142_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2142_pad_0 = const()[name = string("op_2142_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2142_dilations_0 = const()[name = string("op_2142_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2142_groups_0 = const()[name = string("op_2142_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57213952))), nonzero_data = tensor<fp16, [12990]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57187904))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2142_cast_fp16 = conv(dilations = var_2142_dilations_0, groups = var_2142_groups_0, pad = var_2142_pad_0, pad_type = var_2142_pad_type_0, strides = var_2142_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = string("op_2142_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_25_cast_fp16 = add(x = var_2136_cast_fp16, y = var_2142_cast_fp16)[name = string("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_45_cast_fp16")];
+            int32 var_2152 = const()[name = string("op_2152"), val = int32(3)];
+            tensor<int32, [1]> out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2171_to_fp16 = const()[name = string("op_2171_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2171_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")];
+            tensor<fp16, [768]> obj_45_gamma_0_to_fp16 = const()[name = string("obj_45_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57508928)))];
+            tensor<fp16, [768]> obj_45_beta_0_to_fp16 = const()[name = string("obj_45_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57510528)))];
+            fp16 obj_45_epsilon_0_to_fp16 = const()[name = string("obj_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_45_cast_fp16")];
+            string var_2193_pad_type_0 = const()[name = string("op_2193_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2193_strides_0 = const()[name = string("op_2193_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2193_pad_0 = const()[name = string("op_2193_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2193_dilations_0 = const()[name = string("op_2193_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2193_groups_0 = const()[name = string("op_2193_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57512128))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57807104))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57807232)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2193_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2193_dilations_0, groups = var_2193_groups_0, pad = var_2193_pad_0, pad_type = var_2193_pad_type_0, strides = var_2193_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2193_cast_fp16")];
+            string var_2199_pad_type_0 = const()[name = string("op_2199_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2199_strides_0 = const()[name = string("op_2199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2199_pad_0 = const()[name = string("op_2199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2199_dilations_0 = const()[name = string("op_2199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2199_groups_0 = const()[name = string("op_2199_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57815296))), nonzero_data = tensor<fp16, [3182]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57808832))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2199_cast_fp16 = conv(dilations = var_2199_dilations_0, groups = var_2199_groups_0, pad = var_2199_pad_0, pad_type = var_2199_pad_type_0, strides = var_2199_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2199_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> query_cast_fp16 = add(x = var_2193_cast_fp16, y = var_2199_cast_fp16)[name = string("query_cast_fp16")];
+            string var_2208_pad_type_0 = const()[name = string("op_2208_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2208_strides_0 = const()[name = string("op_2208_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2208_pad_0 = const()[name = string("op_2208_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2208_dilations_0 = const()[name = string("op_2208_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2208_groups_0 = const()[name = string("op_2208_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57889088))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58184064))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2208_cast_fp16 = conv(dilations = var_2208_dilations_0, groups = var_2208_groups_0, pad = var_2208_pad_0, pad_type = var_2208_pad_type_0, strides = var_2208_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2208_cast_fp16")];
+            string var_2214_pad_type_0 = const()[name = string("op_2214_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2214_strides_0 = const()[name = string("op_2214_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2214_pad_0 = const()[name = string("op_2214_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2214_dilations_0 = const()[name = string("op_2214_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2214_groups_0 = const()[name = string("op_2214_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58191872))), nonzero_data = tensor<fp16, [3796]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58184192))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2214_cast_fp16 = conv(dilations = var_2214_dilations_0, groups = var_2214_groups_0, pad = var_2214_pad_0, pad_type = var_2214_pad_type_0, strides = var_2214_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2214_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> key_cast_fp16 = add(x = var_2208_cast_fp16, y = var_2214_cast_fp16)[name = string("key_cast_fp16")];
+            string var_2224_pad_type_0 = const()[name = string("op_2224_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2224_strides_0 = const()[name = string("op_2224_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2224_pad_0 = const()[name = string("op_2224_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2224_dilations_0 = const()[name = string("op_2224_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2224_groups_0 = const()[name = string("op_2224_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58265664))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58560640))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58560768)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2224_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2224_dilations_0, groups = var_2224_groups_0, pad = var_2224_pad_0, pad_type = var_2224_pad_type_0, strides = var_2224_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = string("op_2224_cast_fp16")];
+            string var_2230_pad_type_0 = const()[name = string("op_2230_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2230_strides_0 = const()[name = string("op_2230_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2230_pad_0 = const()[name = string("op_2230_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2230_dilations_0 = const()[name = string("op_2230_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2230_groups_0 = const()[name = string("op_2230_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58567232))), nonzero_data = tensor<fp16, [2394]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58562368))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2230_cast_fp16 = conv(dilations = var_2230_dilations_0, groups = var_2230_groups_0, pad = var_2230_pad_0, pad_type = var_2230_pad_type_0, strides = var_2230_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = string("op_2230_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> value_cast_fp16 = add(x = var_2224_cast_fp16, y = var_2230_cast_fp16)[name = string("value_cast_fp16")];
+            tensor<int32, [4]> var_2233 = const()[name = string("op_2233"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> mh_q_cast_fp16 = reshape(shape = var_2233, x = query_cast_fp16)[name = string("mh_q_cast_fp16")];
+            fp16 var_2235_to_fp16 = const()[name = string("op_2235_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1500]> var_2236_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_2235_to_fp16)[name = string("op_2236_cast_fp16")];
+            tensor<int32, [4]> var_2237 = const()[name = string("op_2237"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2238_cast_fp16 = reshape(shape = var_2237, x = key_cast_fp16)[name = string("op_2238_cast_fp16")];
+            bool mh_w_transpose_x_0 = const()[name = string("mh_w_transpose_x_0"), val = bool(true)];
+            bool mh_w_transpose_y_0 = const()[name = string("mh_w_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1500, 1500]> mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_2236_cast_fp16, y = var_2238_cast_fp16)[name = string("mh_w_cast_fp16")];
+            tensor<fp16, [1, 12, 1500, 1500]> var_2241_cast_fp16 = softmax(axis = var_2152, x = mh_w_cast_fp16)[name = string("op_2241_cast_fp16")];
+            tensor<int32, [4]> var_2242 = const()[name = string("op_2242"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1500]> var_2243_cast_fp16 = reshape(shape = var_2242, x = value_cast_fp16)[name = string("op_2243_cast_fp16")];
+            bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)];
+            bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1500]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_2243_cast_fp16, y = var_2241_cast_fp16)[name = string("attn_cast_fp16")];
+            tensor<int32, [4]> var_2246 = const()[name = string("op_2246"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1500]> input_89_cast_fp16 = reshape(shape = var_2246, x = attn_cast_fp16)[name = string("input_89_cast_fp16")];
+            string var_2256_pad_type_0 = const()[name = string("op_2256_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2256_strides_0 = const()[name = string("op_2256_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2256_pad_0 = const()[name = string("op_2256_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2256_dilations_0 = const()[name = string("op_2256_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2256_groups_0 = const()[name = string("op_2256_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58641024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58936000))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58936128)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2256_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2256_dilations_0, groups = var_2256_groups_0, pad = var_2256_pad_0, pad_type = var_2256_pad_type_0, strides = var_2256_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2256_cast_fp16")];
+            string var_2262_pad_type_0 = const()[name = string("op_2262_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2262_strides_0 = const()[name = string("op_2262_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2262_pad_0 = const()[name = string("op_2262_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2262_dilations_0 = const()[name = string("op_2262_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2262_groups_0 = const()[name = string("op_2262_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58943552))), nonzero_data = tensor<fp16, [2873]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58937728))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2262_cast_fp16 = conv(dilations = var_2262_dilations_0, groups = var_2262_groups_0, pad = var_2262_pad_0, pad_type = var_2262_pad_type_0, strides = var_2262_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2262_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> obj_cast_fp16 = add(x = var_2256_cast_fp16, y = var_2262_cast_fp16)[name = string("obj_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_cast_fp16)[name = string("inputs_47_cast_fp16")];
+            tensor<int32, [1]> out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2273_to_fp16 = const()[name = string("op_2273_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2273_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")];
+            tensor<fp16, [768]> input_91_gamma_0_to_fp16 = const()[name = string("input_91_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59017344)))];
+            tensor<fp16, [768]> input_91_beta_0_to_fp16 = const()[name = string("input_91_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59018944)))];
+            fp16 input_91_epsilon_0_to_fp16 = const()[name = string("input_91_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_91_cast_fp16")];
+            string var_2291_pad_type_0 = const()[name = string("op_2291_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2291_strides_0 = const()[name = string("op_2291_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2291_pad_0 = const()[name = string("op_2291_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2291_dilations_0 = const()[name = string("op_2291_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2291_groups_0 = const()[name = string("op_2291_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59020544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60200256))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60200384)))];
+            tensor<fp16, [1, 3072, 1, 1500]> var_2291_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_2291_dilations_0, groups = var_2291_groups_0, pad = var_2291_pad_0, pad_type = var_2291_pad_type_0, strides = var_2291_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2291_cast_fp16")];
+            string var_2297_pad_type_0 = const()[name = string("op_2297_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2297_strides_0 = const()[name = string("op_2297_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2297_pad_0 = const()[name = string("op_2297_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2297_dilations_0 = const()[name = string("op_2297_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2297_groups_0 = const()[name = string("op_2297_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60238976))), nonzero_data = tensor<fp16, [16136]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60206592))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1500]> var_2297_cast_fp16 = conv(dilations = var_2297_dilations_0, groups = var_2297_groups_0, pad = var_2297_pad_0, pad_type = var_2297_pad_type_0, strides = var_2297_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2297_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_93_cast_fp16 = add(x = var_2291_cast_fp16, y = var_2297_cast_fp16)[name = string("input_93_cast_fp16")];
+            string input_95_mode_0 = const()[name = string("input_95_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1500]> input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = string("input_95_cast_fp16")];
+            string var_2308_pad_type_0 = const()[name = string("op_2308_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2308_strides_0 = const()[name = string("op_2308_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2308_pad_0 = const()[name = string("op_2308_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2308_dilations_0 = const()[name = string("op_2308_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2308_groups_0 = const()[name = string("op_2308_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60533952))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61713664))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61713792)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2308_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_2308_dilations_0, groups = var_2308_groups_0, pad = var_2308_pad_0, pad_type = var_2308_pad_type_0, strides = var_2308_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2308_cast_fp16")];
+            string var_2314_pad_type_0 = const()[name = string("op_2314_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2314_strides_0 = const()[name = string("op_2314_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2314_pad_0 = const()[name = string("op_2314_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2314_dilations_0 = const()[name = string("op_2314_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2314_groups_0 = const()[name = string("op_2314_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61759168))), nonzero_data = tensor<fp16, [21830]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61715392))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2314_cast_fp16 = conv(dilations = var_2314_dilations_0, groups = var_2314_groups_0, pad = var_2314_pad_0, pad_type = var_2314_pad_type_0, strides = var_2314_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2314_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> hidden_states_cast_fp16 = add(x = var_2308_cast_fp16, y = var_2314_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> inputs_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_cast_fp16)[name = string("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = string("out_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2329_to_fp16 = const()[name = string("op_2329_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_2329_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")];
+            tensor<fp16, [768]> encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62054144)))];
+            tensor<fp16, [768]> encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62055744)))];
+            fp16 encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = string("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1500]> encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_81_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = string("encoder_output_embeds_type_fp32_cast_fp16")];
+            string var_2355_pad_type_0 = const()[name = string("op_2355_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2355_strides_0 = const()[name = string("op_2355_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2355_pad_0 = const()[name = string("op_2355_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2355_dilations_0 = const()[name = string("op_2355_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2355_groups_0 = const()[name = string("op_2355_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62057344))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62352320))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2355_cast_fp16 = conv(dilations = var_2355_dilations_0, groups = var_2355_groups_0, pad = var_2355_pad_0, pad_type = var_2355_pad_type_0, strides = var_2355_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2355_cast_fp16")];
+            string var_2361_pad_type_0 = const()[name = string("op_2361_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2361_strides_0 = const()[name = string("op_2361_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2361_pad_0 = const()[name = string("op_2361_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2361_dilations_0 = const()[name = string("op_2361_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2361_groups_0 = const()[name = string("op_2361_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62364032))), nonzero_data = tensor<fp16, [5750]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62352448))))[name = string("decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2361_cast_fp16 = conv(dilations = var_2361_dilations_0, groups = var_2361_groups_0, pad = var_2361_pad_0, pad_type = var_2361_pad_type_0, strides = var_2361_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2361_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2362_cast_fp16 = add(x = var_2355_cast_fp16, y = var_2361_cast_fp16)[name = string("op_2362_cast_fp16")];
+            string var_2371_pad_type_0 = const()[name = string("op_2371_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2371_strides_0 = const()[name = string("op_2371_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2371_pad_0 = const()[name = string("op_2371_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2371_dilations_0 = const()[name = string("op_2371_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2371_groups_0 = const()[name = string("op_2371_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62437824))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62732800))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62732928)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2371_cast_fp16 = conv(bias = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2371_dilations_0, groups = var_2371_groups_0, pad = var_2371_pad_0, pad_type = var_2371_pad_type_0, strides = var_2371_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2371_cast_fp16")];
+            string var_2377_pad_type_0 = const()[name = string("op_2377_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2377_strides_0 = const()[name = string("op_2377_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2377_pad_0 = const()[name = string("op_2377_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2377_dilations_0 = const()[name = string("op_2377_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2377_groups_0 = const()[name = string("op_2377_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62740032))), nonzero_data = tensor<fp16, [2713]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62734528))))[name = string("decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2377_cast_fp16 = conv(dilations = var_2377_dilations_0, groups = var_2377_groups_0, pad = var_2377_pad_0, pad_type = var_2377_pad_type_0, strides = var_2377_strides_0, weight = decoder_kv_cache_prep_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2377_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2378_cast_fp16 = add(x = var_2371_cast_fp16, y = var_2377_cast_fp16)[name = string("op_2378_cast_fp16")];
+            string var_2398_pad_type_0 = const()[name = string("op_2398_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2398_strides_0 = const()[name = string("op_2398_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2398_pad_0 = const()[name = string("op_2398_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2398_dilations_0 = const()[name = string("op_2398_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2398_groups_0 = const()[name = string("op_2398_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62813824))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63108800))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2398_cast_fp16 = conv(dilations = var_2398_dilations_0, groups = var_2398_groups_0, pad = var_2398_pad_0, pad_type = var_2398_pad_type_0, strides = var_2398_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2398_cast_fp16")];
+            string var_2404_pad_type_0 = const()[name = string("op_2404_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2404_strides_0 = const()[name = string("op_2404_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2404_pad_0 = const()[name = string("op_2404_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2404_dilations_0 = const()[name = string("op_2404_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2404_groups_0 = const()[name = string("op_2404_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63116352))), nonzero_data = tensor<fp16, [3657]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63108928))))[name = string("decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2404_cast_fp16 = conv(dilations = var_2404_dilations_0, groups = var_2404_groups_0, pad = var_2404_pad_0, pad_type = var_2404_pad_type_0, strides = var_2404_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2404_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2405_cast_fp16 = add(x = var_2398_cast_fp16, y = var_2404_cast_fp16)[name = string("op_2405_cast_fp16")];
+            string var_2414_pad_type_0 = const()[name = string("op_2414_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2414_strides_0 = const()[name = string("op_2414_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2414_pad_0 = const()[name = string("op_2414_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2414_dilations_0 = const()[name = string("op_2414_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2414_groups_0 = const()[name = string("op_2414_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63190144))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63485120))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63485248)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2414_cast_fp16 = conv(bias = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2414_dilations_0, groups = var_2414_groups_0, pad = var_2414_pad_0, pad_type = var_2414_pad_type_0, strides = var_2414_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2414_cast_fp16")];
+            string var_2420_pad_type_0 = const()[name = string("op_2420_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2420_strides_0 = const()[name = string("op_2420_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2420_pad_0 = const()[name = string("op_2420_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2420_dilations_0 = const()[name = string("op_2420_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2420_groups_0 = const()[name = string("op_2420_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63492928))), nonzero_data = tensor<fp16, [2999]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63486848))))[name = string("decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2420_cast_fp16 = conv(dilations = var_2420_dilations_0, groups = var_2420_groups_0, pad = var_2420_pad_0, pad_type = var_2420_pad_type_0, strides = var_2420_strides_0, weight = decoder_kv_cache_prep_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2420_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2421_cast_fp16 = add(x = var_2414_cast_fp16, y = var_2420_cast_fp16)[name = string("op_2421_cast_fp16")];
+            string var_2441_pad_type_0 = const()[name = string("op_2441_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2441_strides_0 = const()[name = string("op_2441_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2441_pad_0 = const()[name = string("op_2441_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2441_dilations_0 = const()[name = string("op_2441_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2441_groups_0 = const()[name = string("op_2441_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63566720))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63861696))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2441_cast_fp16 = conv(dilations = var_2441_dilations_0, groups = var_2441_groups_0, pad = var_2441_pad_0, pad_type = var_2441_pad_type_0, strides = var_2441_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2441_cast_fp16")];
+            string var_2447_pad_type_0 = const()[name = string("op_2447_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2447_strides_0 = const()[name = string("op_2447_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2447_pad_0 = const()[name = string("op_2447_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2447_dilations_0 = const()[name = string("op_2447_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2447_groups_0 = const()[name = string("op_2447_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63873728))), nonzero_data = tensor<fp16, [5903]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63861824))))[name = string("decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2447_cast_fp16 = conv(dilations = var_2447_dilations_0, groups = var_2447_groups_0, pad = var_2447_pad_0, pad_type = var_2447_pad_type_0, strides = var_2447_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2447_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2448_cast_fp16 = add(x = var_2441_cast_fp16, y = var_2447_cast_fp16)[name = string("op_2448_cast_fp16")];
+            string var_2457_pad_type_0 = const()[name = string("op_2457_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2457_strides_0 = const()[name = string("op_2457_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2457_pad_0 = const()[name = string("op_2457_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2457_dilations_0 = const()[name = string("op_2457_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2457_groups_0 = const()[name = string("op_2457_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63947520))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64242496))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64242624)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2457_cast_fp16 = conv(bias = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2457_dilations_0, groups = var_2457_groups_0, pad = var_2457_pad_0, pad_type = var_2457_pad_type_0, strides = var_2457_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2457_cast_fp16")];
+            string var_2463_pad_type_0 = const()[name = string("op_2463_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2463_strides_0 = const()[name = string("op_2463_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2463_pad_0 = const()[name = string("op_2463_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2463_dilations_0 = const()[name = string("op_2463_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2463_groups_0 = const()[name = string("op_2463_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64254144))), nonzero_data = tensor<fp16, [4919]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64244224))))[name = string("decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2463_cast_fp16 = conv(dilations = var_2463_dilations_0, groups = var_2463_groups_0, pad = var_2463_pad_0, pad_type = var_2463_pad_type_0, strides = var_2463_strides_0, weight = decoder_kv_cache_prep_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2463_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2464_cast_fp16 = add(x = var_2457_cast_fp16, y = var_2463_cast_fp16)[name = string("op_2464_cast_fp16")];
+            string var_2484_pad_type_0 = const()[name = string("op_2484_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2484_strides_0 = const()[name = string("op_2484_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2484_pad_0 = const()[name = string("op_2484_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2484_dilations_0 = const()[name = string("op_2484_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2484_groups_0 = const()[name = string("op_2484_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64327936))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64622912))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2484_cast_fp16 = conv(dilations = var_2484_dilations_0, groups = var_2484_groups_0, pad = var_2484_pad_0, pad_type = var_2484_pad_type_0, strides = var_2484_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2484_cast_fp16")];
+            string var_2490_pad_type_0 = const()[name = string("op_2490_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2490_strides_0 = const()[name = string("op_2490_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2490_pad_0 = const()[name = string("op_2490_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2490_dilations_0 = const()[name = string("op_2490_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2490_groups_0 = const()[name = string("op_2490_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64634496))), nonzero_data = tensor<fp16, [5682]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64623040))))[name = string("decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2490_cast_fp16 = conv(dilations = var_2490_dilations_0, groups = var_2490_groups_0, pad = var_2490_pad_0, pad_type = var_2490_pad_type_0, strides = var_2490_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2490_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2491_cast_fp16 = add(x = var_2484_cast_fp16, y = var_2490_cast_fp16)[name = string("op_2491_cast_fp16")];
+            string var_2500_pad_type_0 = const()[name = string("op_2500_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2500_strides_0 = const()[name = string("op_2500_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2500_pad_0 = const()[name = string("op_2500_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2500_dilations_0 = const()[name = string("op_2500_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2500_groups_0 = const()[name = string("op_2500_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64708288))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65003264))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65003392)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2500_cast_fp16 = conv(bias = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2500_dilations_0, groups = var_2500_groups_0, pad = var_2500_pad_0, pad_type = var_2500_pad_type_0, strides = var_2500_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2500_cast_fp16")];
+            string var_2506_pad_type_0 = const()[name = string("op_2506_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2506_strides_0 = const()[name = string("op_2506_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2506_pad_0 = const()[name = string("op_2506_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2506_dilations_0 = const()[name = string("op_2506_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2506_groups_0 = const()[name = string("op_2506_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65015232))), nonzero_data = tensor<fp16, [5086]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65004992))))[name = string("decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2506_cast_fp16 = conv(dilations = var_2506_dilations_0, groups = var_2506_groups_0, pad = var_2506_pad_0, pad_type = var_2506_pad_type_0, strides = var_2506_strides_0, weight = decoder_kv_cache_prep_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2506_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2507_cast_fp16 = add(x = var_2500_cast_fp16, y = var_2506_cast_fp16)[name = string("op_2507_cast_fp16")];
+            string var_2527_pad_type_0 = const()[name = string("op_2527_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2527_strides_0 = const()[name = string("op_2527_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2527_pad_0 = const()[name = string("op_2527_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2527_dilations_0 = const()[name = string("op_2527_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2527_groups_0 = const()[name = string("op_2527_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65089024))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65384000))))[name = string("decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2527_cast_fp16 = conv(dilations = var_2527_dilations_0, groups = var_2527_groups_0, pad = var_2527_pad_0, pad_type = var_2527_pad_type_0, strides = var_2527_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2527_cast_fp16")];
+            string var_2533_pad_type_0 = const()[name = string("op_2533_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2533_strides_0 = const()[name = string("op_2533_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2533_pad_0 = const()[name = string("op_2533_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2533_dilations_0 = const()[name = string("op_2533_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2533_groups_0 = const()[name = string("op_2533_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65393024))), nonzero_data = tensor<fp16, [4398]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65384128))))[name = string("decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2533_cast_fp16 = conv(dilations = var_2533_dilations_0, groups = var_2533_groups_0, pad = var_2533_pad_0, pad_type = var_2533_pad_type_0, strides = var_2533_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2533_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2534_cast_fp16 = add(x = var_2527_cast_fp16, y = var_2533_cast_fp16)[name = string("op_2534_cast_fp16")];
+            string var_2543_pad_type_0 = const()[name = string("op_2543_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2543_strides_0 = const()[name = string("op_2543_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2543_pad_0 = const()[name = string("op_2543_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2543_dilations_0 = const()[name = string("op_2543_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2543_groups_0 = const()[name = string("op_2543_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65466816))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65761792))))[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65761920)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2543_cast_fp16 = conv(bias = decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2543_dilations_0, groups = var_2543_groups_0, pad = var_2543_pad_0, pad_type = var_2543_pad_type_0, strides = var_2543_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2543_cast_fp16")];
+            string var_2549_pad_type_0 = const()[name = string("op_2549_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2549_strides_0 = const()[name = string("op_2549_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2549_pad_0 = const()[name = string("op_2549_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2549_dilations_0 = const()[name = string("op_2549_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2549_groups_0 = const()[name = string("op_2549_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65772736))), nonzero_data = tensor<fp16, [4570]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65763520))))[name = string("decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2549_cast_fp16 = conv(dilations = var_2549_dilations_0, groups = var_2549_groups_0, pad = var_2549_pad_0, pad_type = var_2549_pad_type_0, strides = var_2549_strides_0, weight = decoder_kv_cache_prep_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2549_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2550_cast_fp16 = add(x = var_2543_cast_fp16, y = var_2549_cast_fp16)[name = string("op_2550_cast_fp16")];
+            string var_2570_pad_type_0 = const()[name = string("op_2570_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2570_strides_0 = const()[name = string("op_2570_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2570_pad_0 = const()[name = string("op_2570_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2570_dilations_0 = const()[name = string("op_2570_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2570_groups_0 = const()[name = string("op_2570_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65846528))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66141504))))[name = string("decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2570_cast_fp16 = conv(dilations = var_2570_dilations_0, groups = var_2570_groups_0, pad = var_2570_pad_0, pad_type = var_2570_pad_type_0, strides = var_2570_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2570_cast_fp16")];
+            string var_2576_pad_type_0 = const()[name = string("op_2576_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2576_strides_0 = const()[name = string("op_2576_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2576_pad_0 = const()[name = string("op_2576_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2576_dilations_0 = const()[name = string("op_2576_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2576_groups_0 = const()[name = string("op_2576_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66149056))), nonzero_data = tensor<fp16, [3668]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66141632))))[name = string("decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2576_cast_fp16 = conv(dilations = var_2576_dilations_0, groups = var_2576_groups_0, pad = var_2576_pad_0, pad_type = var_2576_pad_type_0, strides = var_2576_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2576_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2577_cast_fp16 = add(x = var_2570_cast_fp16, y = var_2576_cast_fp16)[name = string("op_2577_cast_fp16")];
+            string var_2586_pad_type_0 = const()[name = string("op_2586_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2586_strides_0 = const()[name = string("op_2586_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2586_pad_0 = const()[name = string("op_2586_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2586_dilations_0 = const()[name = string("op_2586_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2586_groups_0 = const()[name = string("op_2586_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66222848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66517824))))[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66517952)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2586_cast_fp16 = conv(bias = decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2586_dilations_0, groups = var_2586_groups_0, pad = var_2586_pad_0, pad_type = var_2586_pad_type_0, strides = var_2586_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2586_cast_fp16")];
+            string var_2592_pad_type_0 = const()[name = string("op_2592_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2592_strides_0 = const()[name = string("op_2592_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2592_pad_0 = const()[name = string("op_2592_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2592_dilations_0 = const()[name = string("op_2592_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2592_groups_0 = const()[name = string("op_2592_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66525632))), nonzero_data = tensor<fp16, [2978]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66519552))))[name = string("decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2592_cast_fp16 = conv(dilations = var_2592_dilations_0, groups = var_2592_groups_0, pad = var_2592_pad_0, pad_type = var_2592_pad_type_0, strides = var_2592_strides_0, weight = decoder_kv_cache_prep_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2592_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2593_cast_fp16 = add(x = var_2586_cast_fp16, y = var_2592_cast_fp16)[name = string("op_2593_cast_fp16")];
+            string var_2613_pad_type_0 = const()[name = string("op_2613_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2613_strides_0 = const()[name = string("op_2613_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2613_pad_0 = const()[name = string("op_2613_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2613_dilations_0 = const()[name = string("op_2613_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2613_groups_0 = const()[name = string("op_2613_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66599424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66894400))))[name = string("decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2613_cast_fp16 = conv(dilations = var_2613_dilations_0, groups = var_2613_groups_0, pad = var_2613_pad_0, pad_type = var_2613_pad_type_0, strides = var_2613_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2613_cast_fp16")];
+            string var_2619_pad_type_0 = const()[name = string("op_2619_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2619_strides_0 = const()[name = string("op_2619_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2619_pad_0 = const()[name = string("op_2619_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2619_dilations_0 = const()[name = string("op_2619_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2619_groups_0 = const()[name = string("op_2619_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66900864))), nonzero_data = tensor<fp16, [3134]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66894528))))[name = string("decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2619_cast_fp16 = conv(dilations = var_2619_dilations_0, groups = var_2619_groups_0, pad = var_2619_pad_0, pad_type = var_2619_pad_type_0, strides = var_2619_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2619_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2620_cast_fp16 = add(x = var_2613_cast_fp16, y = var_2619_cast_fp16)[name = string("op_2620_cast_fp16")];
+            string var_2629_pad_type_0 = const()[name = string("op_2629_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2629_strides_0 = const()[name = string("op_2629_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2629_pad_0 = const()[name = string("op_2629_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2629_dilations_0 = const()[name = string("op_2629_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2629_groups_0 = const()[name = string("op_2629_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66974656))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67269632))))[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67269760)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2629_cast_fp16 = conv(bias = decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2629_dilations_0, groups = var_2629_groups_0, pad = var_2629_pad_0, pad_type = var_2629_pad_type_0, strides = var_2629_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2629_cast_fp16")];
+            string var_2635_pad_type_0 = const()[name = string("op_2635_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2635_strides_0 = const()[name = string("op_2635_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2635_pad_0 = const()[name = string("op_2635_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2635_dilations_0 = const()[name = string("op_2635_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2635_groups_0 = const()[name = string("op_2635_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67279872))), nonzero_data = tensor<fp16, [4219]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67271360))))[name = string("decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2635_cast_fp16 = conv(dilations = var_2635_dilations_0, groups = var_2635_groups_0, pad = var_2635_pad_0, pad_type = var_2635_pad_type_0, strides = var_2635_strides_0, weight = decoder_kv_cache_prep_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2635_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2636_cast_fp16 = add(x = var_2629_cast_fp16, y = var_2635_cast_fp16)[name = string("op_2636_cast_fp16")];
+            string var_2656_pad_type_0 = const()[name = string("op_2656_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2656_strides_0 = const()[name = string("op_2656_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2656_pad_0 = const()[name = string("op_2656_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2656_dilations_0 = const()[name = string("op_2656_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2656_groups_0 = const()[name = string("op_2656_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67353664))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67648640))))[name = string("decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2656_cast_fp16 = conv(dilations = var_2656_dilations_0, groups = var_2656_groups_0, pad = var_2656_pad_0, pad_type = var_2656_pad_type_0, strides = var_2656_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2656_cast_fp16")];
+            string var_2662_pad_type_0 = const()[name = string("op_2662_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2662_strides_0 = const()[name = string("op_2662_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2662_pad_0 = const()[name = string("op_2662_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2662_dilations_0 = const()[name = string("op_2662_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2662_groups_0 = const()[name = string("op_2662_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67656128))), nonzero_data = tensor<fp16, [3648]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67648768))))[name = string("decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2662_cast_fp16 = conv(dilations = var_2662_dilations_0, groups = var_2662_groups_0, pad = var_2662_pad_0, pad_type = var_2662_pad_type_0, strides = var_2662_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2662_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2663_cast_fp16 = add(x = var_2656_cast_fp16, y = var_2662_cast_fp16)[name = string("op_2663_cast_fp16")];
+            string var_2672_pad_type_0 = const()[name = string("op_2672_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2672_strides_0 = const()[name = string("op_2672_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2672_pad_0 = const()[name = string("op_2672_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2672_dilations_0 = const()[name = string("op_2672_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2672_groups_0 = const()[name = string("op_2672_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67729920))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68024896))))[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68025024)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2672_cast_fp16 = conv(bias = decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2672_dilations_0, groups = var_2672_groups_0, pad = var_2672_pad_0, pad_type = var_2672_pad_type_0, strides = var_2672_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2672_cast_fp16")];
+            string var_2678_pad_type_0 = const()[name = string("op_2678_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2678_strides_0 = const()[name = string("op_2678_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2678_pad_0 = const()[name = string("op_2678_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2678_dilations_0 = const()[name = string("op_2678_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2678_groups_0 = const()[name = string("op_2678_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68033728))), nonzero_data = tensor<fp16, [3496]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68026624))))[name = string("decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2678_cast_fp16 = conv(dilations = var_2678_dilations_0, groups = var_2678_groups_0, pad = var_2678_pad_0, pad_type = var_2678_pad_type_0, strides = var_2678_strides_0, weight = decoder_kv_cache_prep_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2678_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2679_cast_fp16 = add(x = var_2672_cast_fp16, y = var_2678_cast_fp16)[name = string("op_2679_cast_fp16")];
+            string var_2699_pad_type_0 = const()[name = string("op_2699_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2699_strides_0 = const()[name = string("op_2699_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2699_pad_0 = const()[name = string("op_2699_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2699_dilations_0 = const()[name = string("op_2699_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2699_groups_0 = const()[name = string("op_2699_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68107520))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68402496))))[name = string("decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2699_cast_fp16 = conv(dilations = var_2699_dilations_0, groups = var_2699_groups_0, pad = var_2699_pad_0, pad_type = var_2699_pad_type_0, strides = var_2699_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2699_cast_fp16")];
+            string var_2705_pad_type_0 = const()[name = string("op_2705_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2705_strides_0 = const()[name = string("op_2705_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2705_pad_0 = const()[name = string("op_2705_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2705_dilations_0 = const()[name = string("op_2705_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2705_groups_0 = const()[name = string("op_2705_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68409920))), nonzero_data = tensor<fp16, [3611]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68402624))))[name = string("decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2705_cast_fp16 = conv(dilations = var_2705_dilations_0, groups = var_2705_groups_0, pad = var_2705_pad_0, pad_type = var_2705_pad_type_0, strides = var_2705_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2705_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2706_cast_fp16 = add(x = var_2699_cast_fp16, y = var_2705_cast_fp16)[name = string("op_2706_cast_fp16")];
+            string var_2715_pad_type_0 = const()[name = string("op_2715_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2715_strides_0 = const()[name = string("op_2715_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2715_pad_0 = const()[name = string("op_2715_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2715_dilations_0 = const()[name = string("op_2715_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2715_groups_0 = const()[name = string("op_2715_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68483712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68778688))))[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68778816)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2715_cast_fp16 = conv(bias = decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2715_dilations_0, groups = var_2715_groups_0, pad = var_2715_pad_0, pad_type = var_2715_pad_type_0, strides = var_2715_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2715_cast_fp16")];
+            string var_2721_pad_type_0 = const()[name = string("op_2721_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2721_strides_0 = const()[name = string("op_2721_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2721_pad_0 = const()[name = string("op_2721_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2721_dilations_0 = const()[name = string("op_2721_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2721_groups_0 = const()[name = string("op_2721_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68785920))), nonzero_data = tensor<fp16, [2710]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68780416))))[name = string("decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2721_cast_fp16 = conv(dilations = var_2721_dilations_0, groups = var_2721_groups_0, pad = var_2721_pad_0, pad_type = var_2721_pad_type_0, strides = var_2721_strides_0, weight = decoder_kv_cache_prep_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2721_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2722_cast_fp16 = add(x = var_2715_cast_fp16, y = var_2721_cast_fp16)[name = string("op_2722_cast_fp16")];
+            string var_2742_pad_type_0 = const()[name = string("op_2742_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2742_strides_0 = const()[name = string("op_2742_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2742_pad_0 = const()[name = string("op_2742_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2742_dilations_0 = const()[name = string("op_2742_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2742_groups_0 = const()[name = string("op_2742_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68859712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69154688))))[name = string("decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2742_cast_fp16 = conv(dilations = var_2742_dilations_0, groups = var_2742_groups_0, pad = var_2742_pad_0, pad_type = var_2742_pad_type_0, strides = var_2742_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2742_cast_fp16")];
+            string var_2748_pad_type_0 = const()[name = string("op_2748_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2748_strides_0 = const()[name = string("op_2748_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2748_pad_0 = const()[name = string("op_2748_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2748_dilations_0 = const()[name = string("op_2748_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2748_groups_0 = const()[name = string("op_2748_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69161664))), nonzero_data = tensor<fp16, [3375]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69154816))))[name = string("decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2748_cast_fp16 = conv(dilations = var_2748_dilations_0, groups = var_2748_groups_0, pad = var_2748_pad_0, pad_type = var_2748_pad_type_0, strides = var_2748_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2748_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2749_cast_fp16 = add(x = var_2742_cast_fp16, y = var_2748_cast_fp16)[name = string("op_2749_cast_fp16")];
+            string var_2758_pad_type_0 = const()[name = string("op_2758_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2758_strides_0 = const()[name = string("op_2758_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2758_pad_0 = const()[name = string("op_2758_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2758_dilations_0 = const()[name = string("op_2758_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2758_groups_0 = const()[name = string("op_2758_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69235456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69530432))))[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69530560)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2758_cast_fp16 = conv(bias = decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2758_dilations_0, groups = var_2758_groups_0, pad = var_2758_pad_0, pad_type = var_2758_pad_type_0, strides = var_2758_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2758_cast_fp16")];
+            string var_2764_pad_type_0 = const()[name = string("op_2764_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2764_strides_0 = const()[name = string("op_2764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2764_pad_0 = const()[name = string("op_2764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2764_dilations_0 = const()[name = string("op_2764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2764_groups_0 = const()[name = string("op_2764_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69537216))), nonzero_data = tensor<fp16, [2479]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69532160))))[name = string("decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2764_cast_fp16 = conv(dilations = var_2764_dilations_0, groups = var_2764_groups_0, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2764_strides_0, weight = decoder_kv_cache_prep_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2764_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2765_cast_fp16 = add(x = var_2758_cast_fp16, y = var_2764_cast_fp16)[name = string("op_2765_cast_fp16")];
+            string var_2785_pad_type_0 = const()[name = string("op_2785_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2785_strides_0 = const()[name = string("op_2785_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2785_pad_0 = const()[name = string("op_2785_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2785_dilations_0 = const()[name = string("op_2785_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2785_groups_0 = const()[name = string("op_2785_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69611008))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69905984))))[name = string("decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2785_cast_fp16 = conv(dilations = var_2785_dilations_0, groups = var_2785_groups_0, pad = var_2785_pad_0, pad_type = var_2785_pad_type_0, strides = var_2785_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2785_cast_fp16")];
+            string var_2791_pad_type_0 = const()[name = string("op_2791_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2791_strides_0 = const()[name = string("op_2791_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2791_pad_0 = const()[name = string("op_2791_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2791_dilations_0 = const()[name = string("op_2791_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2791_groups_0 = const()[name = string("op_2791_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69913536))), nonzero_data = tensor<fp16, [3654]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69906112))))[name = string("decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2791_cast_fp16 = conv(dilations = var_2791_dilations_0, groups = var_2791_groups_0, pad = var_2791_pad_0, pad_type = var_2791_pad_type_0, strides = var_2791_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2791_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2792_cast_fp16 = add(x = var_2785_cast_fp16, y = var_2791_cast_fp16)[name = string("op_2792_cast_fp16")];
+            string var_2801_pad_type_0 = const()[name = string("op_2801_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2801_strides_0 = const()[name = string("op_2801_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2801_pad_0 = const()[name = string("op_2801_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2801_dilations_0 = const()[name = string("op_2801_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2801_groups_0 = const()[name = string("op_2801_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69987328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70282304))))[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70282432)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2801_cast_fp16 = conv(bias = decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2801_dilations_0, groups = var_2801_groups_0, pad = var_2801_pad_0, pad_type = var_2801_pad_type_0, strides = var_2801_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2801_cast_fp16")];
+            string var_2807_pad_type_0 = const()[name = string("op_2807_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2807_strides_0 = const()[name = string("op_2807_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2807_pad_0 = const()[name = string("op_2807_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2807_dilations_0 = const()[name = string("op_2807_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2807_groups_0 = const()[name = string("op_2807_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70289984))), nonzero_data = tensor<fp16, [2923]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70284032))))[name = string("decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2807_cast_fp16 = conv(dilations = var_2807_dilations_0, groups = var_2807_groups_0, pad = var_2807_pad_0, pad_type = var_2807_pad_type_0, strides = var_2807_strides_0, weight = decoder_kv_cache_prep_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2807_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2808_cast_fp16 = add(x = var_2801_cast_fp16, y = var_2807_cast_fp16)[name = string("op_2808_cast_fp16")];
+            string var_2828_pad_type_0 = const()[name = string("op_2828_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2828_strides_0 = const()[name = string("op_2828_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2828_pad_0 = const()[name = string("op_2828_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2828_dilations_0 = const()[name = string("op_2828_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2828_groups_0 = const()[name = string("op_2828_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70363776))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70658752))))[name = string("decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2828_cast_fp16 = conv(dilations = var_2828_dilations_0, groups = var_2828_groups_0, pad = var_2828_pad_0, pad_type = var_2828_pad_type_0, strides = var_2828_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2828_cast_fp16")];
+            string var_2834_pad_type_0 = const()[name = string("op_2834_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2834_strides_0 = const()[name = string("op_2834_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2834_pad_0 = const()[name = string("op_2834_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2834_dilations_0 = const()[name = string("op_2834_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2834_groups_0 = const()[name = string("op_2834_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70665152))), nonzero_data = tensor<fp16, [3079]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70658880))))[name = string("decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2834_cast_fp16 = conv(dilations = var_2834_dilations_0, groups = var_2834_groups_0, pad = var_2834_pad_0, pad_type = var_2834_pad_type_0, strides = var_2834_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2834_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> k_cast_fp16 = add(x = var_2828_cast_fp16, y = var_2834_cast_fp16)[name = string("k_cast_fp16")];
+            string var_2844_pad_type_0 = const()[name = string("op_2844_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2844_strides_0 = const()[name = string("op_2844_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2844_pad_0 = const()[name = string("op_2844_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2844_dilations_0 = const()[name = string("op_2844_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2844_groups_0 = const()[name = string("op_2844_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70738944))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71033920))))[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71034048)))];
+            tensor<fp16, [1, 768, 1, 1500]> var_2844_cast_fp16 = conv(bias = decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2844_dilations_0, groups = var_2844_groups_0, pad = var_2844_pad_0, pad_type = var_2844_pad_type_0, strides = var_2844_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = string("op_2844_cast_fp16")];
+            string var_2850_pad_type_0 = const()[name = string("op_2850_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2850_strides_0 = const()[name = string("op_2850_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2850_pad_0 = const()[name = string("op_2850_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2850_dilations_0 = const()[name = string("op_2850_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2850_groups_0 = const()[name = string("op_2850_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71041984))), nonzero_data = tensor<fp16, [3136]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71035648))))[name = string("decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1500]> var_2850_cast_fp16 = conv(dilations = var_2850_dilations_0, groups = var_2850_groups_0, pad = var_2850_pad_0, pad_type = var_2850_pad_type_0, strides = var_2850_strides_0, weight = decoder_kv_cache_prep_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = string("op_2850_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1500]> v_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2850_cast_fp16)[name = string("v_cast_fp16")];
+            int32 var_2856 = const()[name = string("op_2856"), val = int32(0)];
+            bool input_99_interleave_0 = const()[name = string("input_99_interleave_0"), val = bool(false)];
+            tensor<fp16, [12, 768, 1, 1500]> input_99_cast_fp16 = concat(axis = var_2856, interleave = input_99_interleave_0, values = (var_2362_cast_fp16, var_2405_cast_fp16, var_2448_cast_fp16, var_2491_cast_fp16, var_2534_cast_fp16, var_2577_cast_fp16, var_2620_cast_fp16, var_2663_cast_fp16, var_2706_cast_fp16, var_2749_cast_fp16, var_2792_cast_fp16, k_cast_fp16))[name = string("input_99_cast_fp16")];
+            int32 var_2859 = const()[name = string("op_2859"), val = int32(0)];
+            bool input_interleave_0 = const()[name = string("input_interleave_0"), val = bool(false)];
+            tensor<fp16, [12, 768, 1, 1500]> input_cast_fp16 = concat(axis = var_2859, interleave = input_interleave_0, values = (var_2378_cast_fp16, var_2421_cast_fp16, var_2464_cast_fp16, var_2507_cast_fp16, var_2550_cast_fp16, var_2593_cast_fp16, var_2636_cast_fp16, var_2679_cast_fp16, var_2722_cast_fp16, var_2765_cast_fp16, var_2808_cast_fp16, v_cast_fp16))[name = string("input_cast_fp16")];
+            tensor<int32, [8]> var_2866_pad_0 = const()[name = string("op_2866_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 36])];
+            string var_2866_mode_0 = const()[name = string("op_2866_mode_0"), val = string("constant")];
+            fp16 const_13_to_fp16 = const()[name = string("const_13_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [12, 768, 1, 1536]> encoder_attn_key_cache = pad(constant_val = const_13_to_fp16, mode = var_2866_mode_0, pad = var_2866_pad_0, x = input_99_cast_fp16)[name = string("op_2866_cast_fp16")];
+            tensor<int32, [8]> var_2872_pad_0 = const()[name = string("op_2872_pad_0"), val = tensor<int32, [8]>([0, 0, 0, 0, 0, 0, 0, 36])];
+            string var_2872_mode_0 = const()[name = string("op_2872_mode_0"), val = string("constant")];
+            fp16 const_14_to_fp16 = const()[name = string("const_14_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [12, 768, 1, 1536]> encoder_attn_value_cache = pad(constant_val = const_14_to_fp16, mode = var_2872_mode_0, pad = var_2872_pad_0, x = input_cast_fp16)[name = string("op_2872_cast_fp16")];
+        } -> (encoder_output_embeds, encoder_attn_key_cache, encoder_attn_value_cache);
+}
\ No newline at end of file
diff --git a/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mlmodel b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..6f9264dd72b78f39cdcdf38e2396625c34f30ed6
--- /dev/null
+++ b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1860cdf3b8cb60f09c2ba261670bac3f5dee04f59c47908ceda7b3c7c32b7c6b
+size 370367
diff --git a/openai_whisper-small_216MB/AudioEncoder.mlmodelc/weights/weight.bin b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f472fdbfa1f215b0e2ea77361c713d17fd370401
--- /dev/null
+++ b/openai_whisper-small_216MB/AudioEncoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abd18b26e136cc4cdaae94985b204a64cf3dd1b47db0096d0a137cbd734986fd
+size 71115776
diff --git a/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a11fbb2cd75b96eb2120a672afefa298c2ef857b
--- /dev/null
+++ b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efc05e563ee0c556e3f578e04be5fb67b4e7520124403f2561f39102f0f2b33d
+size 243
diff --git a/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/coremldata.bin b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a3544b6644c1af93ca6bdabb67a1c51e80eaa552
--- /dev/null
+++ b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4ef11ea703011eab03287ec661f999e19c2c78cf67d531b5e6afa02e18f913d
+size 328
diff --git a/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/metadata.json b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..14c5b740c647e540d629abcaf72ee1cfddce2c7c
--- /dev/null
+++ b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/metadata.json
@@ -0,0 +1,74 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 80 × 1 × 3000)",
+        "shortDescription" : "",
+        "shape" : "[1, 80, 1, 3000]",
+        "name" : "melspectrogram_features",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.mul" : 2,
+      "Ios18.square" : 2,
+      "Ios18.conv" : 2,
+      "Ios18.matmul" : 1,
+      "Ios18.expandDims" : 4,
+      "Ios18.sub" : 1,
+      "Ios18.log" : 1,
+      "Ios18.add" : 3,
+      "Ios18.sliceByIndex" : 1,
+      "Ios18.maximum" : 1,
+      "Ios18.squeeze" : 2,
+      "Ios18.reshape" : 2,
+      "Ios16.reduceMax" : 1,
+      "Identity" : 1,
+      "Pad" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.version" : "8.0",
+      "com.github.apple.coremltools.source" : "torch==2.5.1"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 480000)",
+        "shortDescription" : "",
+        "shape" : "[480000]",
+        "name" : "audio",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "MelSpectrogram",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/model.mil b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..e76a2f7f38c466d22bd0ffdc27ef38a01dd51c37
--- /dev/null
+++ b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/model.mil
@@ -0,0 +1,66 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}, {"coremltools-component-torch", "2.5.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0"}})]
+{
+    func main<ios18>(tensor<fp16, [480000]> audio) {
+            tensor<int32, [3]> var_10 = const()[name = string("op_10"), val = tensor<int32, [3]>([1, 1, 480000])];
+            tensor<fp16, [1, 1, 480000]> input_1_cast_fp16 = reshape(shape = var_10, x = audio)[name = string("input_1_cast_fp16")];
+            tensor<int32, [6]> input_3_pad_0 = const()[name = string("input_3_pad_0"), val = tensor<int32, [6]>([0, 0, 0, 0, 200, 200])];
+            string input_3_mode_0 = const()[name = string("input_3_mode_0"), val = string("reflect")];
+            fp16 const_1_to_fp16 = const()[name = string("const_1_to_fp16"), val = fp16(0x0p+0)];
+            tensor<fp16, [1, 1, 480400]> input_3_cast_fp16 = pad(constant_val = const_1_to_fp16, mode = input_3_mode_0, pad = input_3_pad_0, x = input_1_cast_fp16)[name = string("input_3_cast_fp16")];
+            tensor<int32, [1]> var_22 = const()[name = string("op_22"), val = tensor<int32, [1]>([480400])];
+            tensor<fp16, [480400]> input_cast_fp16 = reshape(shape = var_22, x = input_3_cast_fp16)[name = string("input_cast_fp16")];
+            tensor<int32, [1]> expand_dims_0_axes_0 = const()[name = string("expand_dims_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 480400]> expand_dims_0_cast_fp16 = expand_dims(axes = expand_dims_0_axes_0, x = input_cast_fp16)[name = string("expand_dims_0_cast_fp16")];
+            tensor<int32, [1]> expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor<int32, [1]>([160])];
+            tensor<int32, [1]> expand_dims_4_axes_0 = const()[name = string("expand_dims_4_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 480400]> expand_dims_4_cast_fp16 = expand_dims(axes = expand_dims_4_axes_0, x = expand_dims_0_cast_fp16)[name = string("expand_dims_4_cast_fp16")];
+            string conv_0_pad_type_0 = const()[name = string("conv_0_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> conv_0_pad_0 = const()[name = string("conv_0_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_0_dilations_0 = const()[name = string("conv_0_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 conv_0_groups_0 = const()[name = string("conv_0_groups_0"), val = int32(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_1_to_fp16 = const()[name = string("expand_dims_1_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, 201, 3001]> conv_0_cast_fp16 = conv(dilations = conv_0_dilations_0, groups = conv_0_groups_0, pad = conv_0_pad_0, pad_type = conv_0_pad_type_0, strides = expand_dims_3, weight = expand_dims_1_to_fp16, x = expand_dims_4_cast_fp16)[name = string("conv_0_cast_fp16")];
+            string conv_1_pad_type_0 = const()[name = string("conv_1_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> conv_1_pad_0 = const()[name = string("conv_1_pad_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [1]> conv_1_dilations_0 = const()[name = string("conv_1_dilations_0"), val = tensor<int32, [1]>([1])];
+            int32 conv_1_groups_0 = const()[name = string("conv_1_groups_0"), val = int32(1)];
+            tensor<fp16, [201, 1, 400]> expand_dims_2_to_fp16 = const()[name = string("expand_dims_2_to_fp16"), val = tensor<fp16, [201, 1, 400]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160960)))];
+            tensor<fp16, [1, 201, 3001]> conv_1_cast_fp16 = conv(dilations = conv_1_dilations_0, groups = conv_1_groups_0, pad = conv_1_pad_0, pad_type = conv_1_pad_type_0, strides = expand_dims_3, weight = expand_dims_2_to_fp16, x = expand_dims_4_cast_fp16)[name = string("conv_1_cast_fp16")];
+            tensor<int32, [1]> squeeze_0_axes_0 = const()[name = string("squeeze_0_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_0_cast_fp16 = squeeze(axes = squeeze_0_axes_0, x = conv_0_cast_fp16)[name = string("squeeze_0_cast_fp16")];
+            tensor<int32, [1]> squeeze_1_axes_0 = const()[name = string("squeeze_1_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [201, 3001]> squeeze_1_cast_fp16 = squeeze(axes = squeeze_1_axes_0, x = conv_1_cast_fp16)[name = string("squeeze_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_0_cast_fp16 = square(x = squeeze_0_cast_fp16)[name = string("square_0_cast_fp16")];
+            tensor<fp16, [201, 3001]> square_1_cast_fp16 = square(x = squeeze_1_cast_fp16)[name = string("square_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> add_1_cast_fp16 = add(x = square_0_cast_fp16, y = square_1_cast_fp16)[name = string("add_1_cast_fp16")];
+            tensor<fp16, [201, 3001]> magnitudes_1_cast_fp16 = identity(x = add_1_cast_fp16)[name = string("magnitudes_1_cast_fp16")];
+            tensor<int32, [2]> magnitudes_begin_0 = const()[name = string("magnitudes_begin_0"), val = tensor<int32, [2]>([0, 0])];
+            tensor<int32, [2]> magnitudes_end_0 = const()[name = string("magnitudes_end_0"), val = tensor<int32, [2]>([201, 3000])];
+            tensor<bool, [2]> magnitudes_end_mask_0 = const()[name = string("magnitudes_end_mask_0"), val = tensor<bool, [2]>([true, false])];
+            tensor<fp16, [201, 3000]> magnitudes_cast_fp16 = slice_by_index(begin = magnitudes_begin_0, end = magnitudes_end_0, end_mask = magnitudes_end_mask_0, x = magnitudes_1_cast_fp16)[name = string("magnitudes_cast_fp16")];
+            bool mel_spec_1_transpose_x_0 = const()[name = string("mel_spec_1_transpose_x_0"), val = bool(false)];
+            bool mel_spec_1_transpose_y_0 = const()[name = string("mel_spec_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [80, 201]> mel_filters_to_fp16 = const()[name = string("mel_filters_to_fp16"), val = tensor<fp16, [80, 201]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321856)))];
+            tensor<fp16, [80, 3000]> mel_spec_1_cast_fp16 = matmul(transpose_x = mel_spec_1_transpose_x_0, transpose_y = mel_spec_1_transpose_y_0, x = mel_filters_to_fp16, y = magnitudes_cast_fp16)[name = string("mel_spec_1_cast_fp16")];
+            fp16 var_41_to_fp16 = const()[name = string("op_41_to_fp16"), val = fp16(0x1p-24)];
+            tensor<fp16, [80, 3000]> mel_spec_cast_fp16 = add(x = mel_spec_1_cast_fp16, y = var_41_to_fp16)[name = string("mel_spec_cast_fp16")];
+            fp32 log_0_epsilon_0 = const()[name = string("log_0_epsilon_0"), val = fp32(0x1p-149)];
+            tensor<fp16, [80, 3000]> log_0_cast_fp16 = log(epsilon = log_0_epsilon_0, x = mel_spec_cast_fp16)[name = string("log_0_cast_fp16")];
+            fp16 mul_0_y_0_to_fp16 = const()[name = string("mul_0_y_0_to_fp16"), val = fp16(0x1.bccp-2)];
+            tensor<fp16, [80, 3000]> mul_0_cast_fp16 = mul(x = log_0_cast_fp16, y = mul_0_y_0_to_fp16)[name = string("mul_0_cast_fp16")];
+            bool var_44_keep_dims_0 = const()[name = string("op_44_keep_dims_0"), val = bool(false)];
+            fp16 var_44_cast_fp16 = reduce_max(keep_dims = var_44_keep_dims_0, x = mul_0_cast_fp16)[name = string("op_44_cast_fp16")];
+            fp16 var_46_to_fp16 = const()[name = string("op_46_to_fp16"), val = fp16(0x1p+3)];
+            fp16 var_47_cast_fp16 = sub(x = var_44_cast_fp16, y = var_46_to_fp16)[name = string("op_47_cast_fp16")];
+            tensor<fp16, [80, 3000]> log_spec_3_cast_fp16 = maximum(x = mul_0_cast_fp16, y = var_47_cast_fp16)[name = string("log_spec_3_cast_fp16")];
+            fp16 var_50_to_fp16 = const()[name = string("op_50_to_fp16"), val = fp16(0x1p+2)];
+            tensor<fp16, [80, 3000]> var_51_cast_fp16 = add(x = log_spec_3_cast_fp16, y = var_50_to_fp16)[name = string("op_51_cast_fp16")];
+            fp16 _inversed_log_spec_y_0_to_fp16 = const()[name = string("_inversed_log_spec_y_0_to_fp16"), val = fp16(0x1p-2)];
+            tensor<fp16, [80, 3000]> _inversed_log_spec_cast_fp16 = mul(x = var_51_cast_fp16, y = _inversed_log_spec_y_0_to_fp16)[name = string("_inversed_log_spec_cast_fp16")];
+            tensor<int32, [1]> var_55_axes_0 = const()[name = string("op_55_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<fp16, [1, 80, 3000]> var_55_cast_fp16 = expand_dims(axes = var_55_axes_0, x = _inversed_log_spec_cast_fp16)[name = string("op_55_cast_fp16")];
+            tensor<int32, [1]> var_62_axes_0 = const()[name = string("op_62_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 80, 1, 3000]> melspectrogram_features = expand_dims(axes = var_62_axes_0, x = var_55_cast_fp16)[name = string("op_62_cast_fp16")];
+        } -> (melspectrogram_features);
+}
\ No newline at end of file
diff --git a/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/weights/weight.bin b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f7d28ffac464e9e7086a526930f0059187de8d01
--- /dev/null
+++ b/openai_whisper-small_216MB/MelSpectrogram.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:801024dbc7a89c677be1f8b285de3409e35f7d1786c9c8d9d0d6842ac57a1c83
+size 354080
diff --git a/openai_whisper-small_216MB/TextDecoder.mlmodelc/analytics/coremldata.bin b/openai_whisper-small_216MB/TextDecoder.mlmodelc/analytics/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..51a18b7bf5a50e6939d9b4c716b06e1aaf18e393
--- /dev/null
+++ b/openai_whisper-small_216MB/TextDecoder.mlmodelc/analytics/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7df280e3624592b68f28b53486f5b5774bc21282f6a172d6babfcfc2f5fdf139
+size 243
diff --git a/openai_whisper-small_216MB/TextDecoder.mlmodelc/coremldata.bin b/openai_whisper-small_216MB/TextDecoder.mlmodelc/coremldata.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2e6f8ebf3fd61b7d0e989054d1e1d4161ccd8e95
--- /dev/null
+++ b/openai_whisper-small_216MB/TextDecoder.mlmodelc/coremldata.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3015f3429f673bc4a311b034f2dd12abd86a84e3653afa6f166654e6e6478aeb
+size 754
diff --git a/openai_whisper-small_216MB/TextDecoder.mlmodelc/metadata.json b/openai_whisper-small_216MB/TextDecoder.mlmodelc/metadata.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf1201bf9319d11622db738943459b021c07a106
--- /dev/null
+++ b/openai_whisper-small_216MB/TextDecoder.mlmodelc/metadata.json
@@ -0,0 +1,185 @@
+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Mixed (Float16, Palettized (4 bits), Sparse)",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1 × 51865)",
+        "shortDescription" : "",
+        "shape" : "[1, 1, 51865]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 1]",
+        "name" : "key_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 9216 × 1 × 1)",
+        "shortDescription" : "",
+        "shape" : "[1, 9216, 1, 1]",
+        "name" : "value_cache_updates",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536]",
+        "name" : "alignment_heads_weights",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+
+    ],
+    "specificationVersion" : 9,
+    "mlProgramOperationTypeHistogram" : {
+      "Ios18.expandDims" : 8,
+      "Ios18.softmax" : 24,
+      "Ios18.mul" : 48,
+      "Ios18.matmul" : 48,
+      "Ios18.batchNorm" : 37,
+      "Ios16.reduceMean" : 1,
+      "Split" : 2,
+      "Ios18.readState" : 5,
+      "Ios18.gather" : 3,
+      "Ios18.add" : 182,
+      "Ios18.layerNorm" : 37,
+      "Ios18.reshape" : 96,
+      "Ios18.constexprLutToDense" : 96,
+      "Ios18.constexprSparseToDense" : 97,
+      "Ios18.conv" : 192,
+      "Ios18.gelu" : 12,
+      "Ios18.linear" : 1,
+      "Ios18.cast" : 1,
+      "Ios18.transpose" : 1,
+      "Ios18.concat" : 3,
+      "Ios18.sliceByIndex" : 44,
+      "Ios18.squeeze" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Int32, UInt16)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[1, 1536]",
+        "name" : "encoder_attn_key_padding_mask",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 768 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 1536]",
+        "name" : "encoder_attn_key_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 768 × 1 × 1536)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 1536]",
+        "name" : "encoder_attn_value_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 768 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 448]",
+        "name" : "self_attn_key_cache",
+        "type" : "State"
+      },
+      {
+        "dataType" : "Float16",
+        "isOptional" : "0",
+        "formattedType" : "State (Float16 12 × 768 × 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[12, 768, 1, 448]",
+        "name" : "self_attn_value_cache",
+        "type" : "State"
+      }
+    ],
+    "availability" : {
+      "macOS" : "15.0",
+      "tvOS" : "18.0",
+      "visionOS" : "2.0",
+      "watchOS" : "11.0",
+      "iOS" : "18.0",
+      "macCatalyst" : "18.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.1",
+      "com.github.apple.coremltools.version" : "8.0"
+    },
+    "inputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "input_ids",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Int32",
+        "formattedType" : "MultiArray (Int32 1)",
+        "shortDescription" : "",
+        "shape" : "[1]",
+        "name" : "cache_length",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "kv_cache_update_mask",
+        "type" : "MultiArray"
+      },
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float16",
+        "formattedType" : "MultiArray (Float16 1 × 448)",
+        "shortDescription" : "",
+        "shape" : "[1, 448]",
+        "name" : "decoder_key_padding_mask",
+        "type" : "MultiArray"
+      }
+    ],
+    "generatedClassName" : "TextDecoderStateful_mixedBitPalettized_4_bit",
+    "method" : "predict"
+  }
+]
\ No newline at end of file
diff --git a/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil b/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil
new file mode 100644
index 0000000000000000000000000000000000000000..c3f27f82761c943a4b1c58cea6f5254d4b114be3
--- /dev/null
+++ b/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil
@@ -0,0 +1,2593 @@
+program(1.3)
+[buildInfo = dict<string, string>({{"coremlc-component-MIL", "3400.43.1"}, {"coremlc-version", "3400.58.2"}})]
+{
+    func main<ios18>(tensor<int32, [1]> cache_length, tensor<fp16, [1, 448]> decoder_key_padding_mask, state<tensor<fp16, [12, 768, 1, 1536]>> encoder_attn_key_cache, state<tensor<fp16, [1, 1536]>> encoder_attn_key_padding_mask, state<tensor<fp16, [12, 768, 1, 1536]>> encoder_attn_value_cache, tensor<int32, [1]> input_ids, tensor<fp16, [1, 448]> kv_cache_update_mask, state<tensor<fp16, [12, 768, 1, 448]>> self_attn_key_cache, state<tensor<fp16, [12, 768, 1, 448]>> self_attn_value_cache) {
+            int32 var_42_axis_0 = const()[name = string("op_42_axis_0"), val = int32(0)];
+            int32 var_42_batch_dims_0 = const()[name = string("op_42_batch_dims_0"), val = int32(0)];
+            bool var_42_validate_indices_0 = const()[name = string("op_42_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [51865, 768]> embed_tokens_weight_to_fp16 = const()[name = string("embed_tokens_weight_to_fp16"), val = tensor<fp16, [51865, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))];
+            tensor<fp16, [1, 768]> var_42_cast_fp16 = gather(axis = var_42_axis_0, batch_dims = var_42_batch_dims_0, indices = input_ids, validate_indices = var_42_validate_indices_0, x = embed_tokens_weight_to_fp16)[name = string("op_42_cast_fp16")];
+            int32 var_49_axis_0 = const()[name = string("op_49_axis_0"), val = int32(0)];
+            int32 var_49_batch_dims_0 = const()[name = string("op_49_batch_dims_0"), val = int32(0)];
+            bool var_49_validate_indices_0 = const()[name = string("op_49_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [448, 768]> embed_positions_inlier_module_weight_to_fp16 = const()[name = string("embed_positions_inlier_module_weight_to_fp16"), val = tensor<fp16, [448, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79664768)))];
+            string cache_length_to_uint16_dtype_0 = const()[name = string("cache_length_to_uint16_dtype_0"), val = string("uint16")];
+            tensor<uint16, [1]> cache_length_to_uint16 = cast(dtype = cache_length_to_uint16_dtype_0, x = cache_length)[name = string("cast_0")];
+            tensor<fp16, [1, 768]> var_49_cast_fp16_cast_uint16 = gather(axis = var_49_axis_0, batch_dims = var_49_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_49_validate_indices_0, x = embed_positions_inlier_module_weight_to_fp16)[name = string("op_49_cast_fp16_cast_uint16")];
+            int32 var_51_axis_0 = const()[name = string("op_51_axis_0"), val = int32(0)];
+            int32 var_51_batch_dims_0 = const()[name = string("op_51_batch_dims_0"), val = int32(0)];
+            bool var_51_validate_indices_0 = const()[name = string("op_51_validate_indices_0"), val = bool(false)];
+            tensor<fp16, [448, 768]> embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [448, 768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80361920))), nonzero_data = tensor<fp16, [4429]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80352960))))[name = string("embed_positions_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768]> var_51_cast_fp16_cast_uint16 = gather(axis = var_51_axis_0, batch_dims = var_51_batch_dims_0, indices = cache_length_to_uint16, validate_indices = var_51_validate_indices_0, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = string("op_51_cast_fp16_cast_uint16")];
+            tensor<fp16, [1, 768]> var_52_cast_fp16 = add(x = var_49_cast_fp16_cast_uint16, y = var_51_cast_fp16_cast_uint16)[name = string("op_52_cast_fp16")];
+            tensor<fp16, [1, 768]> hidden_states_1_cast_fp16 = add(x = var_42_cast_fp16, y = var_52_cast_fp16)[name = string("hidden_states_1_cast_fp16")];
+            tensor<int32, [1]> var_66_axes_0 = const()[name = string("op_66_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1]> var_66_cast_fp16 = expand_dims(axes = var_66_axes_0, x = hidden_states_1_cast_fp16)[name = string("op_66_cast_fp16")];
+            tensor<int32, [1]> inputs_1_axes_0 = const()[name = string("inputs_1_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 768, 1, 1]> inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_66_cast_fp16)[name = string("inputs_1_cast_fp16")];
+            tensor<fp16, [12, 768, 1, 448]> read_state_0 = read_state(input = self_attn_key_cache)[name = string("read_state_0")];
+            tensor<int32, [12]> tile_0 = const()[name = string("tile_0"), val = tensor<int32, [12]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80404992)))];
+            int32 var_71_axis_0 = const()[name = string("op_71_axis_0"), val = int32(0)];
+            tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_0, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_1, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_2, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_3, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_4, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_5, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_6, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_7, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_8, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_9, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_10, tensor<fp16, [1, 768, 1, 448]> var_71_cast_fp16_11 = split(axis = var_71_axis_0, split_sizes = tile_0, x = read_state_0)[name = string("op_71_cast_fp16")];
+            tensor<fp16, [12, 768, 1, 448]> read_state_1 = read_state(input = self_attn_value_cache)[name = string("read_state_1")];
+            tensor<int32, [12]> tile_1 = const()[name = string("tile_1"), val = tensor<int32, [12]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80405120)))];
+            int32 var_86_axis_0 = const()[name = string("op_86_axis_0"), val = int32(0)];
+            tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_0, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_1, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_2, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_3, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_4, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_5, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_6, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_7, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_8, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_9, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_10, tensor<fp16, [1, 768, 1, 448]> var_86_cast_fp16_11 = split(axis = var_86_axis_0, split_sizes = tile_1, x = read_state_1)[name = string("op_86_cast_fp16")];
+            tensor<fp16, [12, 768, 1, 1536]> read_state_2 = read_state(input = encoder_attn_key_cache)[name = string("read_state_2")];
+            tensor<int32, [4]> obj_17_begin_0 = const()[name = string("obj_17_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> obj_17_end_0 = const()[name = string("obj_17_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1536])];
+            tensor<bool, [4]> obj_17_end_mask_0 = const()[name = string("obj_17_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_17_cast_fp16 = slice_by_index(begin = obj_17_begin_0, end = obj_17_end_0, end_mask = obj_17_end_mask_0, x = read_state_2)[name = string("obj_17_cast_fp16")];
+            tensor<fp16, [12, 768, 1, 1536]> read_state_3 = read_state(input = encoder_attn_value_cache)[name = string("read_state_3")];
+            tensor<int32, [4]> obj_19_begin_0 = const()[name = string("obj_19_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> obj_19_end_0 = const()[name = string("obj_19_end_0"), val = tensor<int32, [4]>([1, 768, 1, 1536])];
+            tensor<bool, [4]> obj_19_end_mask_0 = const()[name = string("obj_19_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_19_cast_fp16 = slice_by_index(begin = obj_19_begin_0, end = obj_19_end_0, end_mask = obj_19_end_mask_0, x = read_state_3)[name = string("obj_19_cast_fp16")];
+            int32 var_114 = const()[name = string("op_114"), val = int32(3)];
+            tensor<int32, [1]> out_1_axes_0 = const()[name = string("out_1_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_139_to_fp16 = const()[name = string("op_139_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_139_to_fp16, x = inputs_1_cast_fp16)[name = string("out_1_cast_fp16")];
+            tensor<fp16, [768]> obj_5_mean_0_to_fp16 = const()[name = string("obj_5_mean_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80405248)))];
+            tensor<fp16, [768]> obj_5_variance_0_to_fp16 = const()[name = string("obj_5_variance_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80406848)))];
+            tensor<fp16, [768]> obj_5_gamma_0_to_fp16 = const()[name = string("obj_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80408448)))];
+            tensor<fp16, [768]> obj_5_beta_0_to_fp16 = const()[name = string("obj_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80410048)))];
+            fp16 obj_5_epsilon_0_to_fp16 = const()[name = string("obj_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_1_cast_fp16)[name = string("obj_5_cast_fp16")];
+            string var_161_pad_type_0 = const()[name = string("op_161_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_161_strides_0 = const()[name = string("op_161_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_161_pad_0 = const()[name = string("op_161_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_161_dilations_0 = const()[name = string("op_161_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_161_groups_0 = const()[name = string("op_161_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80411648))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80706624))))[name = string("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80706752)))];
+            tensor<fp16, [1, 768, 1, 1]> var_161_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_161_dilations_0, groups = var_161_groups_0, pad = var_161_pad_0, pad_type = var_161_pad_type_0, strides = var_161_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_161_cast_fp16")];
+            string var_167_pad_type_0 = const()[name = string("op_167_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_167_strides_0 = const()[name = string("op_167_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_167_pad_0 = const()[name = string("op_167_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_167_dilations_0 = const()[name = string("op_167_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_167_groups_0 = const()[name = string("op_167_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80719808))), nonzero_data = tensor<fp16, [5693]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80708352))))[name = string("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_167_cast_fp16 = conv(dilations = var_167_dilations_0, groups = var_167_groups_0, pad = var_167_pad_0, pad_type = var_167_pad_type_0, strides = var_167_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_167_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_1_cast_fp16 = add(x = var_161_cast_fp16, y = var_167_cast_fp16)[name = string("query_1_cast_fp16")];
+            string var_176_pad_type_0 = const()[name = string("op_176_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_176_strides_0 = const()[name = string("op_176_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_176_pad_0 = const()[name = string("op_176_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_176_dilations_0 = const()[name = string("op_176_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_176_groups_0 = const()[name = string("op_176_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80793600))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81088576))))[name = string("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_176_cast_fp16 = conv(dilations = var_176_dilations_0, groups = var_176_groups_0, pad = var_176_pad_0, pad_type = var_176_pad_type_0, strides = var_176_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_176_cast_fp16")];
+            string var_182_pad_type_0 = const()[name = string("op_182_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_182_strides_0 = const()[name = string("op_182_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_182_pad_0 = const()[name = string("op_182_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_182_dilations_0 = const()[name = string("op_182_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_182_groups_0 = const()[name = string("op_182_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81100800))), nonzero_data = tensor<fp16, [5987]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81088704))))[name = string("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_182_cast_fp16 = conv(dilations = var_182_dilations_0, groups = var_182_groups_0, pad = var_182_pad_0, pad_type = var_182_pad_type_0, strides = var_182_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_182_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_1_cast_fp16 = add(x = var_176_cast_fp16, y = var_182_cast_fp16)[name = string("current_key_1_cast_fp16")];
+            string var_192_pad_type_0 = const()[name = string("op_192_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_192_strides_0 = const()[name = string("op_192_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_192_pad_0 = const()[name = string("op_192_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_192_dilations_0 = const()[name = string("op_192_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_192_groups_0 = const()[name = string("op_192_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81174592))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81469568))))[name = string("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81469696)))];
+            tensor<fp16, [1, 768, 1, 1]> var_192_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_192_dilations_0, groups = var_192_groups_0, pad = var_192_pad_0, pad_type = var_192_pad_type_0, strides = var_192_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = string("op_192_cast_fp16")];
+            string var_198_pad_type_0 = const()[name = string("op_198_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_198_strides_0 = const()[name = string("op_198_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_198_pad_0 = const()[name = string("op_198_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_198_dilations_0 = const()[name = string("op_198_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_198_groups_0 = const()[name = string("op_198_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81478080))), nonzero_data = tensor<fp16, [3353]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81471296))))[name = string("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_198_cast_fp16 = conv(dilations = var_198_dilations_0, groups = var_198_groups_0, pad = var_198_pad_0, pad_type = var_198_pad_type_0, strides = var_198_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = string("op_198_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_1_cast_fp16 = add(x = var_192_cast_fp16, y = var_198_cast_fp16)[name = string("current_value_1_cast_fp16")];
+            tensor<int32, [1]> var_201_axes_0 = const()[name = string("op_201_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_201_cast_fp16 = expand_dims(axes = var_201_axes_0, x = kv_cache_update_mask)[name = string("op_201_cast_fp16")];
+            tensor<int32, [1]> var_202_axes_0 = const()[name = string("op_202_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_202_cast_fp16 = expand_dims(axes = var_202_axes_0, x = var_201_cast_fp16)[name = string("op_202_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_204_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_202_cast_fp16)[name = string("op_204_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_1_cast_fp16 = add(x = var_71_cast_fp16_0, y = var_204_cast_fp16)[name = string("key_1_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_206_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_202_cast_fp16)[name = string("op_206_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_1_cast_fp16 = add(x = var_86_cast_fp16_0, y = var_206_cast_fp16)[name = string("value_1_cast_fp16")];
+            tensor<int32, [4]> var_209 = const()[name = string("op_209"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_1_cast_fp16 = reshape(shape = var_209, x = query_1_cast_fp16)[name = string("mh_q_1_cast_fp16")];
+            fp16 var_211_to_fp16 = const()[name = string("op_211_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_212_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_211_to_fp16)[name = string("op_212_cast_fp16")];
+            tensor<int32, [4]> var_213 = const()[name = string("op_213"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_214_cast_fp16 = reshape(shape = var_213, x = key_1_cast_fp16)[name = string("op_214_cast_fp16")];
+            bool mh_w_1_transpose_x_0 = const()[name = string("mh_w_1_transpose_x_0"), val = bool(true)];
+            bool mh_w_1_transpose_y_0 = const()[name = string("mh_w_1_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_212_cast_fp16, y = var_214_cast_fp16)[name = string("mh_w_1_cast_fp16")];
+            tensor<int32, [1]> var_218_axes_0 = const()[name = string("op_218_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 448]> var_218_cast_fp16 = expand_dims(axes = var_218_axes_0, x = decoder_key_padding_mask)[name = string("op_218_cast_fp16")];
+            tensor<int32, [1]> var_219_axes_0 = const()[name = string("op_219_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 448]> var_219_cast_fp16 = expand_dims(axes = var_219_axes_0, x = var_218_cast_fp16)[name = string("op_219_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_3_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_222_cast_fp16 = softmax(axis = var_114, x = mh_w_3_cast_fp16)[name = string("op_222_cast_fp16")];
+            tensor<int32, [4]> var_223 = const()[name = string("op_223"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_224_cast_fp16 = reshape(shape = var_223, x = value_1_cast_fp16)[name = string("op_224_cast_fp16")];
+            bool attn_1_transpose_x_0 = const()[name = string("attn_1_transpose_x_0"), val = bool(false)];
+            bool attn_1_transpose_y_0 = const()[name = string("attn_1_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_224_cast_fp16, y = var_222_cast_fp16)[name = string("attn_1_cast_fp16")];
+            tensor<int32, [4]> var_227 = const()[name = string("op_227"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_1_cast_fp16 = reshape(shape = var_227, x = attn_1_cast_fp16)[name = string("input_1_cast_fp16")];
+            string var_237_pad_type_0 = const()[name = string("op_237_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_237_strides_0 = const()[name = string("op_237_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_237_pad_0 = const()[name = string("op_237_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_237_dilations_0 = const()[name = string("op_237_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_237_groups_0 = const()[name = string("op_237_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81551872))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81846848))))[name = string("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81846976)))];
+            tensor<fp16, [1, 768, 1, 1]> var_237_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_237_dilations_0, groups = var_237_groups_0, pad = var_237_pad_0, pad_type = var_237_pad_type_0, strides = var_237_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = string("op_237_cast_fp16")];
+            string var_243_pad_type_0 = const()[name = string("op_243_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_243_strides_0 = const()[name = string("op_243_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_243_pad_0 = const()[name = string("op_243_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_243_dilations_0 = const()[name = string("op_243_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_243_groups_0 = const()[name = string("op_243_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81857344))), nonzero_data = tensor<fp16, [4323]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81848576))))[name = string("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_243_cast_fp16 = conv(dilations = var_243_dilations_0, groups = var_243_groups_0, pad = var_243_pad_0, pad_type = var_243_pad_type_0, strides = var_243_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = string("op_243_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_11_cast_fp16 = add(x = var_237_cast_fp16, y = var_243_cast_fp16)[name = string("obj_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_11_cast_fp16)[name = string("inputs_3_cast_fp16")];
+            tensor<int32, [1]> out_3_axes_0 = const()[name = string("out_3_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_258_to_fp16 = const()[name = string("op_258_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_258_to_fp16, x = inputs_3_cast_fp16)[name = string("out_3_cast_fp16")];
+            tensor<fp16, [768]> obj_13_gamma_0_to_fp16 = const()[name = string("obj_13_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81931136)))];
+            tensor<fp16, [768]> obj_13_beta_0_to_fp16 = const()[name = string("obj_13_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81932736)))];
+            fp16 obj_13_epsilon_0_to_fp16 = const()[name = string("obj_13_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_3_cast_fp16)[name = string("obj_13_cast_fp16")];
+            string var_278_pad_type_0 = const()[name = string("op_278_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_278_strides_0 = const()[name = string("op_278_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_278_pad_0 = const()[name = string("op_278_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_278_dilations_0 = const()[name = string("op_278_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_278_groups_0 = const()[name = string("op_278_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(81934336))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82229312))))[name = string("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82229440)))];
+            tensor<fp16, [1, 768, 1, 1]> var_278_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_278_dilations_0, groups = var_278_groups_0, pad = var_278_pad_0, pad_type = var_278_pad_type_0, strides = var_278_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = string("op_278_cast_fp16")];
+            string var_284_pad_type_0 = const()[name = string("op_284_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_284_strides_0 = const()[name = string("op_284_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_284_pad_0 = const()[name = string("op_284_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_284_dilations_0 = const()[name = string("op_284_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_284_groups_0 = const()[name = string("op_284_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82243008))), nonzero_data = tensor<fp16, [5932]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82231040))))[name = string("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_284_cast_fp16 = conv(dilations = var_284_dilations_0, groups = var_284_groups_0, pad = var_284_pad_0, pad_type = var_284_pad_type_0, strides = var_284_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = string("op_284_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_3_cast_fp16 = add(x = var_278_cast_fp16, y = var_284_cast_fp16)[name = string("query_3_cast_fp16")];
+            tensor<int32, [4]> var_287 = const()[name = string("op_287"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_3_cast_fp16 = reshape(shape = var_287, x = query_3_cast_fp16)[name = string("mh_q_3_cast_fp16")];
+            fp16 var_289_to_fp16 = const()[name = string("op_289_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_290_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_289_to_fp16)[name = string("op_290_cast_fp16")];
+            tensor<int32, [4]> var_291 = const()[name = string("op_291"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_292_cast_fp16 = reshape(shape = var_291, x = obj_17_cast_fp16)[name = string("op_292_cast_fp16")];
+            bool mh_w_5_transpose_x_0 = const()[name = string("mh_w_5_transpose_x_0"), val = bool(true)];
+            bool mh_w_5_transpose_y_0 = const()[name = string("mh_w_5_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_290_cast_fp16, y = var_292_cast_fp16)[name = string("mh_w_5_cast_fp16")];
+            tensor<fp16, [1, 1536]> read_state_4 = read_state(input = encoder_attn_key_padding_mask)[name = string("read_state_4")];
+            tensor<int32, [1]> var_296_axes_0 = const()[name = string("op_296_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1536]> var_296_cast_fp16 = expand_dims(axes = var_296_axes_0, x = read_state_4)[name = string("op_296_cast_fp16")];
+            tensor<int32, [1]> var_297_axes_0 = const()[name = string("op_297_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 1, 1, 1536]> var_297_cast_fp16 = expand_dims(axes = var_297_axes_0, x = var_296_cast_fp16)[name = string("op_297_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_7_cast_fp16 = add(x = mh_w_5_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_7_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_23_cast_fp16 = softmax(axis = var_114, x = mh_w_7_cast_fp16)[name = string("obj_23_cast_fp16")];
+            tensor<int32, [4]> var_301 = const()[name = string("op_301"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_302_cast_fp16 = reshape(shape = var_301, x = obj_19_cast_fp16)[name = string("op_302_cast_fp16")];
+            bool attn_3_transpose_x_0 = const()[name = string("attn_3_transpose_x_0"), val = bool(false)];
+            bool attn_3_transpose_y_0 = const()[name = string("attn_3_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_302_cast_fp16, y = obj_23_cast_fp16)[name = string("attn_3_cast_fp16")];
+            tensor<int32, [4]> var_305 = const()[name = string("op_305"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_3_cast_fp16 = reshape(shape = var_305, x = attn_3_cast_fp16)[name = string("input_3_cast_fp16")];
+            string var_315_pad_type_0 = const()[name = string("op_315_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_315_strides_0 = const()[name = string("op_315_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_315_pad_0 = const()[name = string("op_315_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_315_dilations_0 = const()[name = string("op_315_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_315_groups_0 = const()[name = string("op_315_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82316800))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82611776))))[name = string("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82611904)))];
+            tensor<fp16, [1, 768, 1, 1]> var_315_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_315_dilations_0, groups = var_315_groups_0, pad = var_315_pad_0, pad_type = var_315_pad_type_0, strides = var_315_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = string("op_315_cast_fp16")];
+            string var_321_pad_type_0 = const()[name = string("op_321_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_321_strides_0 = const()[name = string("op_321_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_321_pad_0 = const()[name = string("op_321_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_321_dilations_0 = const()[name = string("op_321_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_321_groups_0 = const()[name = string("op_321_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82619520))), nonzero_data = tensor<fp16, [2967]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82613504))))[name = string("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_321_cast_fp16 = conv(dilations = var_321_dilations_0, groups = var_321_groups_0, pad = var_321_pad_0, pad_type = var_321_pad_type_0, strides = var_321_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = string("op_321_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_21_cast_fp16 = add(x = var_315_cast_fp16, y = var_321_cast_fp16)[name = string("obj_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_21_cast_fp16)[name = string("inputs_5_cast_fp16")];
+            tensor<int32, [1]> out_5_axes_0 = const()[name = string("out_5_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_332_to_fp16 = const()[name = string("op_332_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_332_to_fp16, x = inputs_5_cast_fp16)[name = string("out_5_cast_fp16")];
+            tensor<fp16, [768]> input_5_gamma_0_to_fp16 = const()[name = string("input_5_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82693312)))];
+            tensor<fp16, [768]> input_5_beta_0_to_fp16 = const()[name = string("input_5_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82694912)))];
+            fp16 input_5_epsilon_0_to_fp16 = const()[name = string("input_5_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_5_cast_fp16)[name = string("input_5_cast_fp16")];
+            string var_350_pad_type_0 = const()[name = string("op_350_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_350_strides_0 = const()[name = string("op_350_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_350_pad_0 = const()[name = string("op_350_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_350_dilations_0 = const()[name = string("op_350_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_350_groups_0 = const()[name = string("op_350_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82696512))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83876224))))[name = string("layers_0_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83876352)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_350_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_350_dilations_0, groups = var_350_groups_0, pad = var_350_pad_0, pad_type = var_350_pad_type_0, strides = var_350_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = string("op_350_cast_fp16")];
+            string var_356_pad_type_0 = const()[name = string("op_356_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_356_strides_0 = const()[name = string("op_356_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_356_pad_0 = const()[name = string("op_356_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_356_dilations_0 = const()[name = string("op_356_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_356_groups_0 = const()[name = string("op_356_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83918848))), nonzero_data = tensor<fp16, [18096]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(83882560))))[name = string("layers_0_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_356_cast_fp16 = conv(dilations = var_356_dilations_0, groups = var_356_groups_0, pad = var_356_pad_0, pad_type = var_356_pad_type_0, strides = var_356_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = string("op_356_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_7_cast_fp16 = add(x = var_350_cast_fp16, y = var_356_cast_fp16)[name = string("input_7_cast_fp16")];
+            string input_9_mode_0 = const()[name = string("input_9_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = string("input_9_cast_fp16")];
+            string var_367_pad_type_0 = const()[name = string("op_367_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_367_strides_0 = const()[name = string("op_367_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_367_pad_0 = const()[name = string("op_367_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_367_dilations_0 = const()[name = string("op_367_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_367_groups_0 = const()[name = string("op_367_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84213824))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85393536))))[name = string("layers_0_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85393664)))];
+            tensor<fp16, [1, 768, 1, 1]> var_367_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_367_dilations_0, groups = var_367_groups_0, pad = var_367_pad_0, pad_type = var_367_pad_type_0, strides = var_367_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = string("op_367_cast_fp16")];
+            string var_373_pad_type_0 = const()[name = string("op_373_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_373_strides_0 = const()[name = string("op_373_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_373_pad_0 = const()[name = string("op_373_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_373_dilations_0 = const()[name = string("op_373_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_373_groups_0 = const()[name = string("op_373_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85420224))), nonzero_data = tensor<fp16, [12421]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85395264))))[name = string("layers_0_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_373_cast_fp16 = conv(dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = string("op_373_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_3_cast_fp16 = add(x = var_367_cast_fp16, y = var_373_cast_fp16)[name = string("hidden_states_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = string("inputs_7_cast_fp16")];
+            tensor<int32, [4]> obj_35_begin_0 = const()[name = string("obj_35_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> obj_35_end_0 = const()[name = string("obj_35_end_0"), val = tensor<int32, [4]>([2, 768, 1, 1536])];
+            tensor<bool, [4]> obj_35_end_mask_0 = const()[name = string("obj_35_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_35_cast_fp16 = slice_by_index(begin = obj_35_begin_0, end = obj_35_end_0, end_mask = obj_35_end_mask_0, x = read_state_2)[name = string("obj_35_cast_fp16")];
+            tensor<int32, [4]> obj_37_begin_0 = const()[name = string("obj_37_begin_0"), val = tensor<int32, [4]>([1, 0, 0, 0])];
+            tensor<int32, [4]> obj_37_end_0 = const()[name = string("obj_37_end_0"), val = tensor<int32, [4]>([2, 768, 1, 1536])];
+            tensor<bool, [4]> obj_37_end_mask_0 = const()[name = string("obj_37_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_37_cast_fp16 = slice_by_index(begin = obj_37_begin_0, end = obj_37_end_0, end_mask = obj_37_end_mask_0, x = read_state_3)[name = string("obj_37_cast_fp16")];
+            int32 var_395 = const()[name = string("op_395"), val = int32(3)];
+            tensor<int32, [1]> out_7_axes_0 = const()[name = string("out_7_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_420_to_fp16 = const()[name = string("op_420_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_420_to_fp16, x = inputs_7_cast_fp16)[name = string("out_7_cast_fp16")];
+            tensor<fp16, [768]> obj_25_gamma_0_to_fp16 = const()[name = string("obj_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85715200)))];
+            tensor<fp16, [768]> obj_25_beta_0_to_fp16 = const()[name = string("obj_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85716800)))];
+            fp16 obj_25_epsilon_0_to_fp16 = const()[name = string("obj_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_7_cast_fp16)[name = string("obj_25_cast_fp16")];
+            string var_442_pad_type_0 = const()[name = string("op_442_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_442_strides_0 = const()[name = string("op_442_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_442_pad_0 = const()[name = string("op_442_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_442_dilations_0 = const()[name = string("op_442_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_442_groups_0 = const()[name = string("op_442_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85718400))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86013376))))[name = string("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86013504)))];
+            tensor<fp16, [1, 768, 1, 1]> var_442_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_442_dilations_0, groups = var_442_groups_0, pad = var_442_pad_0, pad_type = var_442_pad_type_0, strides = var_442_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_442_cast_fp16")];
+            string var_448_pad_type_0 = const()[name = string("op_448_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_448_strides_0 = const()[name = string("op_448_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_448_pad_0 = const()[name = string("op_448_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_448_dilations_0 = const()[name = string("op_448_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_448_groups_0 = const()[name = string("op_448_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86032320))), nonzero_data = tensor<fp16, [8553]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86015104))))[name = string("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_448_cast_fp16 = conv(dilations = var_448_dilations_0, groups = var_448_groups_0, pad = var_448_pad_0, pad_type = var_448_pad_type_0, strides = var_448_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_448_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_5_cast_fp16 = add(x = var_442_cast_fp16, y = var_448_cast_fp16)[name = string("query_5_cast_fp16")];
+            string var_457_pad_type_0 = const()[name = string("op_457_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_457_strides_0 = const()[name = string("op_457_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_457_pad_0 = const()[name = string("op_457_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_457_dilations_0 = const()[name = string("op_457_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_457_groups_0 = const()[name = string("op_457_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86106112))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86401088))))[name = string("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_457_cast_fp16 = conv(dilations = var_457_dilations_0, groups = var_457_groups_0, pad = var_457_pad_0, pad_type = var_457_pad_type_0, strides = var_457_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_457_cast_fp16")];
+            string var_463_pad_type_0 = const()[name = string("op_463_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_463_strides_0 = const()[name = string("op_463_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_463_pad_0 = const()[name = string("op_463_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_463_dilations_0 = const()[name = string("op_463_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_463_groups_0 = const()[name = string("op_463_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86415168))), nonzero_data = tensor<fp16, [6913]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86401216))))[name = string("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_463_cast_fp16 = conv(dilations = var_463_dilations_0, groups = var_463_groups_0, pad = var_463_pad_0, pad_type = var_463_pad_type_0, strides = var_463_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_463_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_3_cast_fp16 = add(x = var_457_cast_fp16, y = var_463_cast_fp16)[name = string("current_key_3_cast_fp16")];
+            string var_473_pad_type_0 = const()[name = string("op_473_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_473_strides_0 = const()[name = string("op_473_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_473_pad_0 = const()[name = string("op_473_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_473_dilations_0 = const()[name = string("op_473_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_473_groups_0 = const()[name = string("op_473_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86488960))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86783936))))[name = string("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86784064)))];
+            tensor<fp16, [1, 768, 1, 1]> var_473_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = string("op_473_cast_fp16")];
+            string var_479_pad_type_0 = const()[name = string("op_479_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_479_strides_0 = const()[name = string("op_479_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_479_pad_0 = const()[name = string("op_479_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_479_dilations_0 = const()[name = string("op_479_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_479_groups_0 = const()[name = string("op_479_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86802496))), nonzero_data = tensor<fp16, [8378]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86785664))))[name = string("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = string("op_479_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = string("current_value_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_485_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_202_cast_fp16)[name = string("op_485_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_3_cast_fp16 = add(x = var_71_cast_fp16_1, y = var_485_cast_fp16)[name = string("key_3_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_487_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_202_cast_fp16)[name = string("op_487_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_3_cast_fp16 = add(x = var_86_cast_fp16_1, y = var_487_cast_fp16)[name = string("value_3_cast_fp16")];
+            tensor<int32, [4]> var_490 = const()[name = string("op_490"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_5_cast_fp16 = reshape(shape = var_490, x = query_5_cast_fp16)[name = string("mh_q_5_cast_fp16")];
+            fp16 var_492_to_fp16 = const()[name = string("op_492_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_493_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_492_to_fp16)[name = string("op_493_cast_fp16")];
+            tensor<int32, [4]> var_494 = const()[name = string("op_494"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_495_cast_fp16 = reshape(shape = var_494, x = key_3_cast_fp16)[name = string("op_495_cast_fp16")];
+            bool mh_w_9_transpose_x_0 = const()[name = string("mh_w_9_transpose_x_0"), val = bool(true)];
+            bool mh_w_9_transpose_y_0 = const()[name = string("mh_w_9_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_9_cast_fp16 = matmul(transpose_x = mh_w_9_transpose_x_0, transpose_y = mh_w_9_transpose_y_0, x = var_493_cast_fp16, y = var_495_cast_fp16)[name = string("mh_w_9_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_11_cast_fp16 = add(x = mh_w_9_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_11_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_503_cast_fp16 = softmax(axis = var_395, x = mh_w_11_cast_fp16)[name = string("op_503_cast_fp16")];
+            tensor<int32, [4]> var_504 = const()[name = string("op_504"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_505_cast_fp16 = reshape(shape = var_504, x = value_3_cast_fp16)[name = string("op_505_cast_fp16")];
+            bool attn_5_transpose_x_0 = const()[name = string("attn_5_transpose_x_0"), val = bool(false)];
+            bool attn_5_transpose_y_0 = const()[name = string("attn_5_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_505_cast_fp16, y = var_503_cast_fp16)[name = string("attn_5_cast_fp16")];
+            tensor<int32, [4]> var_508 = const()[name = string("op_508"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_11_cast_fp16 = reshape(shape = var_508, x = attn_5_cast_fp16)[name = string("input_11_cast_fp16")];
+            string var_518_pad_type_0 = const()[name = string("op_518_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_518_strides_0 = const()[name = string("op_518_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_518_pad_0 = const()[name = string("op_518_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_518_dilations_0 = const()[name = string("op_518_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_518_groups_0 = const()[name = string("op_518_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86876288))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87171264))))[name = string("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87171392)))];
+            tensor<fp16, [1, 768, 1, 1]> var_518_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_518_dilations_0, groups = var_518_groups_0, pad = var_518_pad_0, pad_type = var_518_pad_type_0, strides = var_518_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = string("op_518_cast_fp16")];
+            string var_524_pad_type_0 = const()[name = string("op_524_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_524_strides_0 = const()[name = string("op_524_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_524_pad_0 = const()[name = string("op_524_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_524_dilations_0 = const()[name = string("op_524_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_524_groups_0 = const()[name = string("op_524_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87189952))), nonzero_data = tensor<fp16, [8447]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87172992))))[name = string("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_524_cast_fp16 = conv(dilations = var_524_dilations_0, groups = var_524_groups_0, pad = var_524_pad_0, pad_type = var_524_pad_type_0, strides = var_524_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = string("op_524_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_31_cast_fp16 = add(x = var_518_cast_fp16, y = var_524_cast_fp16)[name = string("obj_31_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_31_cast_fp16)[name = string("inputs_9_cast_fp16")];
+            tensor<int32, [1]> out_9_axes_0 = const()[name = string("out_9_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_539_to_fp16 = const()[name = string("op_539_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_539_to_fp16, x = inputs_9_cast_fp16)[name = string("out_9_cast_fp16")];
+            tensor<fp16, [768]> obj_33_gamma_0_to_fp16 = const()[name = string("obj_33_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87263744)))];
+            tensor<fp16, [768]> obj_33_beta_0_to_fp16 = const()[name = string("obj_33_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87265344)))];
+            fp16 obj_33_epsilon_0_to_fp16 = const()[name = string("obj_33_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_9_cast_fp16)[name = string("obj_33_cast_fp16")];
+            string var_559_pad_type_0 = const()[name = string("op_559_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_559_strides_0 = const()[name = string("op_559_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_559_pad_0 = const()[name = string("op_559_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_559_dilations_0 = const()[name = string("op_559_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_559_groups_0 = const()[name = string("op_559_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87266944))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87561920))))[name = string("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87562048)))];
+            tensor<fp16, [1, 768, 1, 1]> var_559_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_559_dilations_0, groups = var_559_groups_0, pad = var_559_pad_0, pad_type = var_559_pad_type_0, strides = var_559_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = string("op_559_cast_fp16")];
+            string var_565_pad_type_0 = const()[name = string("op_565_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_565_strides_0 = const()[name = string("op_565_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_565_pad_0 = const()[name = string("op_565_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_565_dilations_0 = const()[name = string("op_565_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_565_groups_0 = const()[name = string("op_565_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87570560))), nonzero_data = tensor<fp16, [3417]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87563648))))[name = string("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_565_cast_fp16 = conv(dilations = var_565_dilations_0, groups = var_565_groups_0, pad = var_565_pad_0, pad_type = var_565_pad_type_0, strides = var_565_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = string("op_565_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_7_cast_fp16 = add(x = var_559_cast_fp16, y = var_565_cast_fp16)[name = string("query_7_cast_fp16")];
+            tensor<int32, [4]> var_568 = const()[name = string("op_568"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_7_cast_fp16 = reshape(shape = var_568, x = query_7_cast_fp16)[name = string("mh_q_7_cast_fp16")];
+            fp16 var_570_to_fp16 = const()[name = string("op_570_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_571_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_570_to_fp16)[name = string("op_571_cast_fp16")];
+            tensor<int32, [4]> var_572 = const()[name = string("op_572"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_573_cast_fp16 = reshape(shape = var_572, x = obj_35_cast_fp16)[name = string("op_573_cast_fp16")];
+            bool mh_w_13_transpose_x_0 = const()[name = string("mh_w_13_transpose_x_0"), val = bool(true)];
+            bool mh_w_13_transpose_y_0 = const()[name = string("mh_w_13_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_571_cast_fp16, y = var_573_cast_fp16)[name = string("mh_w_13_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_15_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_41_cast_fp16 = softmax(axis = var_395, x = mh_w_15_cast_fp16)[name = string("obj_41_cast_fp16")];
+            tensor<int32, [4]> var_582 = const()[name = string("op_582"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_583_cast_fp16 = reshape(shape = var_582, x = obj_37_cast_fp16)[name = string("op_583_cast_fp16")];
+            bool attn_7_transpose_x_0 = const()[name = string("attn_7_transpose_x_0"), val = bool(false)];
+            bool attn_7_transpose_y_0 = const()[name = string("attn_7_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_583_cast_fp16, y = obj_41_cast_fp16)[name = string("attn_7_cast_fp16")];
+            tensor<int32, [4]> var_586 = const()[name = string("op_586"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_13_cast_fp16 = reshape(shape = var_586, x = attn_7_cast_fp16)[name = string("input_13_cast_fp16")];
+            string var_596_pad_type_0 = const()[name = string("op_596_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_596_strides_0 = const()[name = string("op_596_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_596_pad_0 = const()[name = string("op_596_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_596_dilations_0 = const()[name = string("op_596_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_596_groups_0 = const()[name = string("op_596_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87644352))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87939328))))[name = string("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87939456)))];
+            tensor<fp16, [1, 768, 1, 1]> var_596_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_596_dilations_0, groups = var_596_groups_0, pad = var_596_pad_0, pad_type = var_596_pad_type_0, strides = var_596_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = string("op_596_cast_fp16")];
+            string var_602_pad_type_0 = const()[name = string("op_602_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_602_strides_0 = const()[name = string("op_602_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_602_pad_0 = const()[name = string("op_602_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_602_dilations_0 = const()[name = string("op_602_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_602_groups_0 = const()[name = string("op_602_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87947776))), nonzero_data = tensor<fp16, [3305]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(87941056))))[name = string("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_602_cast_fp16 = conv(dilations = var_602_dilations_0, groups = var_602_groups_0, pad = var_602_pad_0, pad_type = var_602_pad_type_0, strides = var_602_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = string("op_602_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_39_cast_fp16 = add(x = var_596_cast_fp16, y = var_602_cast_fp16)[name = string("obj_39_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_39_cast_fp16)[name = string("inputs_11_cast_fp16")];
+            tensor<int32, [1]> out_11_axes_0 = const()[name = string("out_11_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_613_to_fp16 = const()[name = string("op_613_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_613_to_fp16, x = inputs_11_cast_fp16)[name = string("out_11_cast_fp16")];
+            tensor<fp16, [768]> input_15_gamma_0_to_fp16 = const()[name = string("input_15_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88021568)))];
+            tensor<fp16, [768]> input_15_beta_0_to_fp16 = const()[name = string("input_15_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88023168)))];
+            fp16 input_15_epsilon_0_to_fp16 = const()[name = string("input_15_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_11_cast_fp16)[name = string("input_15_cast_fp16")];
+            string var_631_pad_type_0 = const()[name = string("op_631_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_631_strides_0 = const()[name = string("op_631_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_631_pad_0 = const()[name = string("op_631_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_631_dilations_0 = const()[name = string("op_631_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_631_groups_0 = const()[name = string("op_631_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88024768))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89204480))))[name = string("layers_1_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89204608)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_631_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_631_dilations_0, groups = var_631_groups_0, pad = var_631_pad_0, pad_type = var_631_pad_type_0, strides = var_631_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = string("op_631_cast_fp16")];
+            string var_637_pad_type_0 = const()[name = string("op_637_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_637_strides_0 = const()[name = string("op_637_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_637_pad_0 = const()[name = string("op_637_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_637_dilations_0 = const()[name = string("op_637_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_637_groups_0 = const()[name = string("op_637_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89259392))), nonzero_data = tensor<fp16, [24235]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89210816))))[name = string("layers_1_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_637_cast_fp16 = conv(dilations = var_637_dilations_0, groups = var_637_groups_0, pad = var_637_pad_0, pad_type = var_637_pad_type_0, strides = var_637_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = string("op_637_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_17_cast_fp16 = add(x = var_631_cast_fp16, y = var_637_cast_fp16)[name = string("input_17_cast_fp16")];
+            string input_19_mode_0 = const()[name = string("input_19_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = string("input_19_cast_fp16")];
+            string var_648_pad_type_0 = const()[name = string("op_648_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_648_strides_0 = const()[name = string("op_648_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_648_pad_0 = const()[name = string("op_648_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_648_dilations_0 = const()[name = string("op_648_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_648_groups_0 = const()[name = string("op_648_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(89554368))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90734080))))[name = string("layers_1_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90734208)))];
+            tensor<fp16, [1, 768, 1, 1]> var_648_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_648_dilations_0, groups = var_648_groups_0, pad = var_648_pad_0, pad_type = var_648_pad_type_0, strides = var_648_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = string("op_648_cast_fp16")];
+            string var_654_pad_type_0 = const()[name = string("op_654_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_654_strides_0 = const()[name = string("op_654_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_654_pad_0 = const()[name = string("op_654_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_654_dilations_0 = const()[name = string("op_654_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_654_groups_0 = const()[name = string("op_654_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90768320))), nonzero_data = tensor<fp16, [16198]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90735808))))[name = string("layers_1_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_654_cast_fp16 = conv(dilations = var_654_dilations_0, groups = var_654_groups_0, pad = var_654_pad_0, pad_type = var_654_pad_type_0, strides = var_654_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = string("op_654_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_5_cast_fp16 = add(x = var_648_cast_fp16, y = var_654_cast_fp16)[name = string("hidden_states_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = string("inputs_13_cast_fp16")];
+            tensor<int32, [4]> obj_53_begin_0 = const()[name = string("obj_53_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> obj_53_end_0 = const()[name = string("obj_53_end_0"), val = tensor<int32, [4]>([3, 768, 1, 1536])];
+            tensor<bool, [4]> obj_53_end_mask_0 = const()[name = string("obj_53_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_53_cast_fp16 = slice_by_index(begin = obj_53_begin_0, end = obj_53_end_0, end_mask = obj_53_end_mask_0, x = read_state_2)[name = string("obj_53_cast_fp16")];
+            tensor<int32, [4]> obj_55_begin_0 = const()[name = string("obj_55_begin_0"), val = tensor<int32, [4]>([2, 0, 0, 0])];
+            tensor<int32, [4]> obj_55_end_0 = const()[name = string("obj_55_end_0"), val = tensor<int32, [4]>([3, 768, 1, 1536])];
+            tensor<bool, [4]> obj_55_end_mask_0 = const()[name = string("obj_55_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_55_cast_fp16 = slice_by_index(begin = obj_55_begin_0, end = obj_55_end_0, end_mask = obj_55_end_mask_0, x = read_state_3)[name = string("obj_55_cast_fp16")];
+            int32 var_676 = const()[name = string("op_676"), val = int32(3)];
+            tensor<int32, [1]> out_13_axes_0 = const()[name = string("out_13_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_701_to_fp16 = const()[name = string("op_701_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_701_to_fp16, x = inputs_13_cast_fp16)[name = string("out_13_cast_fp16")];
+            tensor<fp16, [768]> obj_43_gamma_0_to_fp16 = const()[name = string("obj_43_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91063296)))];
+            tensor<fp16, [768]> obj_43_beta_0_to_fp16 = const()[name = string("obj_43_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91064896)))];
+            fp16 obj_43_epsilon_0_to_fp16 = const()[name = string("obj_43_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_13_cast_fp16)[name = string("obj_43_cast_fp16")];
+            string var_723_pad_type_0 = const()[name = string("op_723_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_723_strides_0 = const()[name = string("op_723_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_723_pad_0 = const()[name = string("op_723_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_723_dilations_0 = const()[name = string("op_723_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_723_groups_0 = const()[name = string("op_723_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91066496))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91361472))))[name = string("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91361600)))];
+            tensor<fp16, [1, 768, 1, 1]> var_723_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_723_dilations_0, groups = var_723_groups_0, pad = var_723_pad_0, pad_type = var_723_pad_type_0, strides = var_723_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_723_cast_fp16")];
+            string var_729_pad_type_0 = const()[name = string("op_729_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_729_strides_0 = const()[name = string("op_729_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_729_pad_0 = const()[name = string("op_729_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_729_dilations_0 = const()[name = string("op_729_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_729_groups_0 = const()[name = string("op_729_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91390144))), nonzero_data = tensor<fp16, [13427]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91363200))))[name = string("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_729_cast_fp16 = conv(dilations = var_729_dilations_0, groups = var_729_groups_0, pad = var_729_pad_0, pad_type = var_729_pad_type_0, strides = var_729_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_729_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_9_cast_fp16 = add(x = var_723_cast_fp16, y = var_729_cast_fp16)[name = string("query_9_cast_fp16")];
+            string var_738_pad_type_0 = const()[name = string("op_738_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_738_strides_0 = const()[name = string("op_738_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_738_pad_0 = const()[name = string("op_738_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_738_dilations_0 = const()[name = string("op_738_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_738_groups_0 = const()[name = string("op_738_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91463936))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91758912))))[name = string("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_738_cast_fp16 = conv(dilations = var_738_dilations_0, groups = var_738_groups_0, pad = var_738_pad_0, pad_type = var_738_pad_type_0, strides = var_738_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_738_cast_fp16")];
+            string var_744_pad_type_0 = const()[name = string("op_744_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_744_strides_0 = const()[name = string("op_744_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_744_pad_0 = const()[name = string("op_744_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_744_dilations_0 = const()[name = string("op_744_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_744_groups_0 = const()[name = string("op_744_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91783680))), nonzero_data = tensor<fp16, [12281]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91759040))))[name = string("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_744_cast_fp16 = conv(dilations = var_744_dilations_0, groups = var_744_groups_0, pad = var_744_pad_0, pad_type = var_744_pad_type_0, strides = var_744_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_744_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_5_cast_fp16 = add(x = var_738_cast_fp16, y = var_744_cast_fp16)[name = string("current_key_5_cast_fp16")];
+            string var_754_pad_type_0 = const()[name = string("op_754_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_754_strides_0 = const()[name = string("op_754_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_754_pad_0 = const()[name = string("op_754_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_754_dilations_0 = const()[name = string("op_754_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_754_groups_0 = const()[name = string("op_754_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(91857472))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92152448))))[name = string("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92152576)))];
+            tensor<fp16, [1, 768, 1, 1]> var_754_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_754_dilations_0, groups = var_754_groups_0, pad = var_754_pad_0, pad_type = var_754_pad_type_0, strides = var_754_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = string("op_754_cast_fp16")];
+            string var_760_pad_type_0 = const()[name = string("op_760_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_760_strides_0 = const()[name = string("op_760_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_760_pad_0 = const()[name = string("op_760_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_760_dilations_0 = const()[name = string("op_760_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_760_groups_0 = const()[name = string("op_760_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92186432))), nonzero_data = tensor<fp16, [16072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92154176))))[name = string("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_760_cast_fp16 = conv(dilations = var_760_dilations_0, groups = var_760_groups_0, pad = var_760_pad_0, pad_type = var_760_pad_type_0, strides = var_760_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = string("op_760_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_5_cast_fp16 = add(x = var_754_cast_fp16, y = var_760_cast_fp16)[name = string("current_value_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_766_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_202_cast_fp16)[name = string("op_766_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_5_cast_fp16 = add(x = var_71_cast_fp16_2, y = var_766_cast_fp16)[name = string("key_5_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_768_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_202_cast_fp16)[name = string("op_768_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_5_cast_fp16 = add(x = var_86_cast_fp16_2, y = var_768_cast_fp16)[name = string("value_5_cast_fp16")];
+            tensor<int32, [4]> var_771 = const()[name = string("op_771"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_9_cast_fp16 = reshape(shape = var_771, x = query_9_cast_fp16)[name = string("mh_q_9_cast_fp16")];
+            fp16 var_773_to_fp16 = const()[name = string("op_773_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_774_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_773_to_fp16)[name = string("op_774_cast_fp16")];
+            tensor<int32, [4]> var_775 = const()[name = string("op_775"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_776_cast_fp16 = reshape(shape = var_775, x = key_5_cast_fp16)[name = string("op_776_cast_fp16")];
+            bool mh_w_17_transpose_x_0 = const()[name = string("mh_w_17_transpose_x_0"), val = bool(true)];
+            bool mh_w_17_transpose_y_0 = const()[name = string("mh_w_17_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_774_cast_fp16, y = var_776_cast_fp16)[name = string("mh_w_17_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_19_cast_fp16 = add(x = mh_w_17_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_19_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_784_cast_fp16 = softmax(axis = var_676, x = mh_w_19_cast_fp16)[name = string("op_784_cast_fp16")];
+            tensor<int32, [4]> var_785 = const()[name = string("op_785"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_786_cast_fp16 = reshape(shape = var_785, x = value_5_cast_fp16)[name = string("op_786_cast_fp16")];
+            bool attn_9_transpose_x_0 = const()[name = string("attn_9_transpose_x_0"), val = bool(false)];
+            bool attn_9_transpose_y_0 = const()[name = string("attn_9_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_786_cast_fp16, y = var_784_cast_fp16)[name = string("attn_9_cast_fp16")];
+            tensor<int32, [4]> var_789 = const()[name = string("op_789"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_21_cast_fp16 = reshape(shape = var_789, x = attn_9_cast_fp16)[name = string("input_21_cast_fp16")];
+            string var_799_pad_type_0 = const()[name = string("op_799_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_799_strides_0 = const()[name = string("op_799_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_799_pad_0 = const()[name = string("op_799_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_799_dilations_0 = const()[name = string("op_799_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_799_groups_0 = const()[name = string("op_799_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92260224))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92555200))))[name = string("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92555328)))];
+            tensor<fp16, [1, 768, 1, 1]> var_799_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_799_dilations_0, groups = var_799_groups_0, pad = var_799_pad_0, pad_type = var_799_pad_type_0, strides = var_799_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = string("op_799_cast_fp16")];
+            string var_805_pad_type_0 = const()[name = string("op_805_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_805_strides_0 = const()[name = string("op_805_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_805_pad_0 = const()[name = string("op_805_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_805_dilations_0 = const()[name = string("op_805_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_805_groups_0 = const()[name = string("op_805_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92581120))), nonzero_data = tensor<fp16, [12036]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92556928))))[name = string("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_805_cast_fp16 = conv(dilations = var_805_dilations_0, groups = var_805_groups_0, pad = var_805_pad_0, pad_type = var_805_pad_type_0, strides = var_805_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = string("op_805_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_49_cast_fp16 = add(x = var_799_cast_fp16, y = var_805_cast_fp16)[name = string("obj_49_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_49_cast_fp16)[name = string("inputs_15_cast_fp16")];
+            tensor<int32, [1]> out_15_axes_0 = const()[name = string("out_15_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_820_to_fp16 = const()[name = string("op_820_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_820_to_fp16, x = inputs_15_cast_fp16)[name = string("out_15_cast_fp16")];
+            tensor<fp16, [768]> obj_51_gamma_0_to_fp16 = const()[name = string("obj_51_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92654912)))];
+            tensor<fp16, [768]> obj_51_beta_0_to_fp16 = const()[name = string("obj_51_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92656512)))];
+            fp16 obj_51_epsilon_0_to_fp16 = const()[name = string("obj_51_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_15_cast_fp16)[name = string("obj_51_cast_fp16")];
+            string var_840_pad_type_0 = const()[name = string("op_840_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_840_strides_0 = const()[name = string("op_840_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_840_pad_0 = const()[name = string("op_840_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_840_dilations_0 = const()[name = string("op_840_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_840_groups_0 = const()[name = string("op_840_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92658112))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92953088))))[name = string("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92953216)))];
+            tensor<fp16, [1, 768, 1, 1]> var_840_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_840_dilations_0, groups = var_840_groups_0, pad = var_840_pad_0, pad_type = var_840_pad_type_0, strides = var_840_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = string("op_840_cast_fp16")];
+            string var_846_pad_type_0 = const()[name = string("op_846_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_846_strides_0 = const()[name = string("op_846_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_846_pad_0 = const()[name = string("op_846_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_846_dilations_0 = const()[name = string("op_846_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_846_groups_0 = const()[name = string("op_846_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92965312))), nonzero_data = tensor<fp16, [5190]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(92954816))))[name = string("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_846_cast_fp16 = conv(dilations = var_846_dilations_0, groups = var_846_groups_0, pad = var_846_pad_0, pad_type = var_846_pad_type_0, strides = var_846_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = string("op_846_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_11_cast_fp16 = add(x = var_840_cast_fp16, y = var_846_cast_fp16)[name = string("query_11_cast_fp16")];
+            tensor<int32, [4]> var_849 = const()[name = string("op_849"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_11_cast_fp16 = reshape(shape = var_849, x = query_11_cast_fp16)[name = string("mh_q_11_cast_fp16")];
+            fp16 var_851_to_fp16 = const()[name = string("op_851_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_852_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_851_to_fp16)[name = string("op_852_cast_fp16")];
+            tensor<int32, [4]> var_853 = const()[name = string("op_853"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_854_cast_fp16 = reshape(shape = var_853, x = obj_53_cast_fp16)[name = string("op_854_cast_fp16")];
+            bool mh_w_21_transpose_x_0 = const()[name = string("mh_w_21_transpose_x_0"), val = bool(true)];
+            bool mh_w_21_transpose_y_0 = const()[name = string("mh_w_21_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_21_cast_fp16 = matmul(transpose_x = mh_w_21_transpose_x_0, transpose_y = mh_w_21_transpose_y_0, x = var_852_cast_fp16, y = var_854_cast_fp16)[name = string("mh_w_21_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_23_cast_fp16 = add(x = mh_w_21_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_23_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_59_cast_fp16 = softmax(axis = var_676, x = mh_w_23_cast_fp16)[name = string("obj_59_cast_fp16")];
+            tensor<int32, [4]> var_863 = const()[name = string("op_863"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_864_cast_fp16 = reshape(shape = var_863, x = obj_55_cast_fp16)[name = string("op_864_cast_fp16")];
+            bool attn_11_transpose_x_0 = const()[name = string("attn_11_transpose_x_0"), val = bool(false)];
+            bool attn_11_transpose_y_0 = const()[name = string("attn_11_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_864_cast_fp16, y = obj_59_cast_fp16)[name = string("attn_11_cast_fp16")];
+            tensor<int32, [4]> var_867 = const()[name = string("op_867"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_23_cast_fp16 = reshape(shape = var_867, x = attn_11_cast_fp16)[name = string("input_23_cast_fp16")];
+            string var_877_pad_type_0 = const()[name = string("op_877_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_877_strides_0 = const()[name = string("op_877_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_877_pad_0 = const()[name = string("op_877_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_877_dilations_0 = const()[name = string("op_877_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_877_groups_0 = const()[name = string("op_877_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93039104))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93334080))))[name = string("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93334208)))];
+            tensor<fp16, [1, 768, 1, 1]> var_877_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_877_dilations_0, groups = var_877_groups_0, pad = var_877_pad_0, pad_type = var_877_pad_type_0, strides = var_877_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = string("op_877_cast_fp16")];
+            string var_883_pad_type_0 = const()[name = string("op_883_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_883_strides_0 = const()[name = string("op_883_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_883_pad_0 = const()[name = string("op_883_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_883_dilations_0 = const()[name = string("op_883_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_883_groups_0 = const()[name = string("op_883_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93346048))), nonzero_data = tensor<fp16, [5066]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93335808))))[name = string("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_883_cast_fp16 = conv(dilations = var_883_dilations_0, groups = var_883_groups_0, pad = var_883_pad_0, pad_type = var_883_pad_type_0, strides = var_883_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = string("op_883_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_57_cast_fp16 = add(x = var_877_cast_fp16, y = var_883_cast_fp16)[name = string("obj_57_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_57_cast_fp16)[name = string("inputs_17_cast_fp16")];
+            tensor<int32, [1]> out_17_axes_0 = const()[name = string("out_17_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_894_to_fp16 = const()[name = string("op_894_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_894_to_fp16, x = inputs_17_cast_fp16)[name = string("out_17_cast_fp16")];
+            tensor<fp16, [768]> input_25_gamma_0_to_fp16 = const()[name = string("input_25_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93419840)))];
+            tensor<fp16, [768]> input_25_beta_0_to_fp16 = const()[name = string("input_25_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93421440)))];
+            fp16 input_25_epsilon_0_to_fp16 = const()[name = string("input_25_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_17_cast_fp16)[name = string("input_25_cast_fp16")];
+            string var_912_pad_type_0 = const()[name = string("op_912_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_912_strides_0 = const()[name = string("op_912_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_912_pad_0 = const()[name = string("op_912_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_912_dilations_0 = const()[name = string("op_912_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_912_groups_0 = const()[name = string("op_912_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93423040))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94602752))))[name = string("layers_2_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94602880)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_912_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_912_dilations_0, groups = var_912_groups_0, pad = var_912_pad_0, pad_type = var_912_pad_type_0, strides = var_912_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = string("op_912_cast_fp16")];
+            string var_918_pad_type_0 = const()[name = string("op_918_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_918_strides_0 = const()[name = string("op_918_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_918_pad_0 = const()[name = string("op_918_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_918_dilations_0 = const()[name = string("op_918_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_918_groups_0 = const()[name = string("op_918_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94663168))), nonzero_data = tensor<fp16, [26997]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94609088))))[name = string("layers_2_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_918_cast_fp16 = conv(dilations = var_918_dilations_0, groups = var_918_groups_0, pad = var_918_pad_0, pad_type = var_918_pad_type_0, strides = var_918_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = string("op_918_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_27_cast_fp16 = add(x = var_912_cast_fp16, y = var_918_cast_fp16)[name = string("input_27_cast_fp16")];
+            string input_29_mode_0 = const()[name = string("input_29_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = string("input_29_cast_fp16")];
+            string var_929_pad_type_0 = const()[name = string("op_929_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_929_strides_0 = const()[name = string("op_929_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_929_pad_0 = const()[name = string("op_929_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_929_dilations_0 = const()[name = string("op_929_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_929_groups_0 = const()[name = string("op_929_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94958144))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96137856))))[name = string("layers_2_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96137984)))];
+            tensor<fp16, [1, 768, 1, 1]> var_929_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_929_dilations_0, groups = var_929_groups_0, pad = var_929_pad_0, pad_type = var_929_pad_type_0, strides = var_929_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = string("op_929_cast_fp16")];
+            string var_935_pad_type_0 = const()[name = string("op_935_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_935_strides_0 = const()[name = string("op_935_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_935_pad_0 = const()[name = string("op_935_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_935_dilations_0 = const()[name = string("op_935_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_935_groups_0 = const()[name = string("op_935_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96169152))), nonzero_data = tensor<fp16, [14724]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96139584))))[name = string("layers_2_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_935_cast_fp16 = conv(dilations = var_935_dilations_0, groups = var_935_groups_0, pad = var_935_pad_0, pad_type = var_935_pad_type_0, strides = var_935_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = string("op_935_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_7_cast_fp16 = add(x = var_929_cast_fp16, y = var_935_cast_fp16)[name = string("hidden_states_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = string("inputs_19_cast_fp16")];
+            tensor<int32, [4]> obj_71_begin_0 = const()[name = string("obj_71_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> obj_71_end_0 = const()[name = string("obj_71_end_0"), val = tensor<int32, [4]>([4, 768, 1, 1536])];
+            tensor<bool, [4]> obj_71_end_mask_0 = const()[name = string("obj_71_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_71_cast_fp16 = slice_by_index(begin = obj_71_begin_0, end = obj_71_end_0, end_mask = obj_71_end_mask_0, x = read_state_2)[name = string("obj_71_cast_fp16")];
+            tensor<int32, [4]> obj_73_begin_0 = const()[name = string("obj_73_begin_0"), val = tensor<int32, [4]>([3, 0, 0, 0])];
+            tensor<int32, [4]> obj_73_end_0 = const()[name = string("obj_73_end_0"), val = tensor<int32, [4]>([4, 768, 1, 1536])];
+            tensor<bool, [4]> obj_73_end_mask_0 = const()[name = string("obj_73_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_73_cast_fp16 = slice_by_index(begin = obj_73_begin_0, end = obj_73_end_0, end_mask = obj_73_end_mask_0, x = read_state_3)[name = string("obj_73_cast_fp16")];
+            int32 var_957 = const()[name = string("op_957"), val = int32(3)];
+            tensor<int32, [1]> out_19_axes_0 = const()[name = string("out_19_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_982_to_fp16 = const()[name = string("op_982_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_982_to_fp16, x = inputs_19_cast_fp16)[name = string("out_19_cast_fp16")];
+            tensor<fp16, [768]> obj_61_gamma_0_to_fp16 = const()[name = string("obj_61_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96464128)))];
+            tensor<fp16, [768]> obj_61_beta_0_to_fp16 = const()[name = string("obj_61_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96465728)))];
+            fp16 obj_61_epsilon_0_to_fp16 = const()[name = string("obj_61_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_19_cast_fp16)[name = string("obj_61_cast_fp16")];
+            string var_1004_pad_type_0 = const()[name = string("op_1004_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1004_strides_0 = const()[name = string("op_1004_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1004_pad_0 = const()[name = string("op_1004_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1004_dilations_0 = const()[name = string("op_1004_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1004_groups_0 = const()[name = string("op_1004_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96467328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96762304))))[name = string("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96762432)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1004_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1004_dilations_0, groups = var_1004_groups_0, pad = var_1004_pad_0, pad_type = var_1004_pad_type_0, strides = var_1004_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1004_cast_fp16")];
+            string var_1010_pad_type_0 = const()[name = string("op_1010_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1010_strides_0 = const()[name = string("op_1010_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1010_pad_0 = const()[name = string("op_1010_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1010_dilations_0 = const()[name = string("op_1010_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1010_groups_0 = const()[name = string("op_1010_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96778752))), nonzero_data = tensor<fp16, [7317]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96764032))))[name = string("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1010_cast_fp16 = conv(dilations = var_1010_dilations_0, groups = var_1010_groups_0, pad = var_1010_pad_0, pad_type = var_1010_pad_type_0, strides = var_1010_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1010_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_13_cast_fp16 = add(x = var_1004_cast_fp16, y = var_1010_cast_fp16)[name = string("query_13_cast_fp16")];
+            string var_1019_pad_type_0 = const()[name = string("op_1019_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1019_strides_0 = const()[name = string("op_1019_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1019_pad_0 = const()[name = string("op_1019_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1019_dilations_0 = const()[name = string("op_1019_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1019_groups_0 = const()[name = string("op_1019_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96852544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97147520))))[name = string("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_1019_cast_fp16 = conv(dilations = var_1019_dilations_0, groups = var_1019_groups_0, pad = var_1019_pad_0, pad_type = var_1019_pad_type_0, strides = var_1019_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1019_cast_fp16")];
+            string var_1025_pad_type_0 = const()[name = string("op_1025_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1025_strides_0 = const()[name = string("op_1025_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1025_pad_0 = const()[name = string("op_1025_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1025_dilations_0 = const()[name = string("op_1025_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1025_groups_0 = const()[name = string("op_1025_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97163456))), nonzero_data = tensor<fp16, [7842]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97147648))))[name = string("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1025_cast_fp16 = conv(dilations = var_1025_dilations_0, groups = var_1025_groups_0, pad = var_1025_pad_0, pad_type = var_1025_pad_type_0, strides = var_1025_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1025_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_7_cast_fp16 = add(x = var_1019_cast_fp16, y = var_1025_cast_fp16)[name = string("current_key_7_cast_fp16")];
+            string var_1035_pad_type_0 = const()[name = string("op_1035_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1035_strides_0 = const()[name = string("op_1035_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1035_pad_0 = const()[name = string("op_1035_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1035_dilations_0 = const()[name = string("op_1035_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1035_groups_0 = const()[name = string("op_1035_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97237248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97532224))))[name = string("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97532352)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1035_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1035_dilations_0, groups = var_1035_groups_0, pad = var_1035_pad_0, pad_type = var_1035_pad_type_0, strides = var_1035_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = string("op_1035_cast_fp16")];
+            string var_1041_pad_type_0 = const()[name = string("op_1041_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1041_strides_0 = const()[name = string("op_1041_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1041_pad_0 = const()[name = string("op_1041_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1041_dilations_0 = const()[name = string("op_1041_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1041_groups_0 = const()[name = string("op_1041_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97541824))), nonzero_data = tensor<fp16, [3898]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97533952))))[name = string("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1041_cast_fp16 = conv(dilations = var_1041_dilations_0, groups = var_1041_groups_0, pad = var_1041_pad_0, pad_type = var_1041_pad_type_0, strides = var_1041_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = string("op_1041_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_7_cast_fp16 = add(x = var_1035_cast_fp16, y = var_1041_cast_fp16)[name = string("current_value_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1047_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_202_cast_fp16)[name = string("op_1047_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_7_cast_fp16 = add(x = var_71_cast_fp16_3, y = var_1047_cast_fp16)[name = string("key_7_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1049_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_202_cast_fp16)[name = string("op_1049_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_7_cast_fp16 = add(x = var_86_cast_fp16_3, y = var_1049_cast_fp16)[name = string("value_7_cast_fp16")];
+            tensor<int32, [4]> var_1052 = const()[name = string("op_1052"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_13_cast_fp16 = reshape(shape = var_1052, x = query_13_cast_fp16)[name = string("mh_q_13_cast_fp16")];
+            fp16 var_1054_to_fp16 = const()[name = string("op_1054_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1055_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1054_to_fp16)[name = string("op_1055_cast_fp16")];
+            tensor<int32, [4]> var_1056 = const()[name = string("op_1056"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1057_cast_fp16 = reshape(shape = var_1056, x = key_7_cast_fp16)[name = string("op_1057_cast_fp16")];
+            bool mh_w_25_transpose_x_0 = const()[name = string("mh_w_25_transpose_x_0"), val = bool(true)];
+            bool mh_w_25_transpose_y_0 = const()[name = string("mh_w_25_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1055_cast_fp16, y = var_1057_cast_fp16)[name = string("mh_w_25_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_27_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_1065_cast_fp16 = softmax(axis = var_957, x = mh_w_27_cast_fp16)[name = string("op_1065_cast_fp16")];
+            tensor<int32, [4]> var_1066 = const()[name = string("op_1066"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1067_cast_fp16 = reshape(shape = var_1066, x = value_7_cast_fp16)[name = string("op_1067_cast_fp16")];
+            bool attn_13_transpose_x_0 = const()[name = string("attn_13_transpose_x_0"), val = bool(false)];
+            bool attn_13_transpose_y_0 = const()[name = string("attn_13_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1067_cast_fp16, y = var_1065_cast_fp16)[name = string("attn_13_cast_fp16")];
+            tensor<int32, [4]> var_1070 = const()[name = string("op_1070"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_31_cast_fp16 = reshape(shape = var_1070, x = attn_13_cast_fp16)[name = string("input_31_cast_fp16")];
+            string var_1080_pad_type_0 = const()[name = string("op_1080_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1080_strides_0 = const()[name = string("op_1080_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1080_pad_0 = const()[name = string("op_1080_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1080_dilations_0 = const()[name = string("op_1080_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1080_groups_0 = const()[name = string("op_1080_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97615616))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97910592))))[name = string("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97910720)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1080_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1080_dilations_0, groups = var_1080_groups_0, pad = var_1080_pad_0, pad_type = var_1080_pad_type_0, strides = var_1080_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = string("op_1080_cast_fp16")];
+            string var_1086_pad_type_0 = const()[name = string("op_1086_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1086_strides_0 = const()[name = string("op_1086_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1086_pad_0 = const()[name = string("op_1086_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1086_dilations_0 = const()[name = string("op_1086_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1086_groups_0 = const()[name = string("op_1086_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97923584))), nonzero_data = tensor<fp16, [5588]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97912320))))[name = string("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1086_cast_fp16 = conv(dilations = var_1086_dilations_0, groups = var_1086_groups_0, pad = var_1086_pad_0, pad_type = var_1086_pad_type_0, strides = var_1086_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = string("op_1086_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_67_cast_fp16 = add(x = var_1080_cast_fp16, y = var_1086_cast_fp16)[name = string("obj_67_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_67_cast_fp16)[name = string("inputs_21_cast_fp16")];
+            tensor<int32, [1]> out_21_axes_0 = const()[name = string("out_21_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1101_to_fp16 = const()[name = string("op_1101_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1101_to_fp16, x = inputs_21_cast_fp16)[name = string("out_21_cast_fp16")];
+            tensor<fp16, [768]> obj_69_gamma_0_to_fp16 = const()[name = string("obj_69_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97997376)))];
+            tensor<fp16, [768]> obj_69_beta_0_to_fp16 = const()[name = string("obj_69_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(97998976)))];
+            fp16 obj_69_epsilon_0_to_fp16 = const()[name = string("obj_69_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_21_cast_fp16)[name = string("obj_69_cast_fp16")];
+            string var_1121_pad_type_0 = const()[name = string("op_1121_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1121_strides_0 = const()[name = string("op_1121_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1121_pad_0 = const()[name = string("op_1121_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1121_dilations_0 = const()[name = string("op_1121_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1121_groups_0 = const()[name = string("op_1121_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98000576))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98295552))))[name = string("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98295680)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1121_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1121_dilations_0, groups = var_1121_groups_0, pad = var_1121_pad_0, pad_type = var_1121_pad_type_0, strides = var_1121_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = string("op_1121_cast_fp16")];
+            string var_1127_pad_type_0 = const()[name = string("op_1127_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1127_strides_0 = const()[name = string("op_1127_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1127_pad_0 = const()[name = string("op_1127_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1127_dilations_0 = const()[name = string("op_1127_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1127_groups_0 = const()[name = string("op_1127_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98307712))), nonzero_data = tensor<fp16, [5181]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98297280))))[name = string("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1127_cast_fp16 = conv(dilations = var_1127_dilations_0, groups = var_1127_groups_0, pad = var_1127_pad_0, pad_type = var_1127_pad_type_0, strides = var_1127_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = string("op_1127_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_15_cast_fp16 = add(x = var_1121_cast_fp16, y = var_1127_cast_fp16)[name = string("query_15_cast_fp16")];
+            tensor<int32, [4]> var_1130 = const()[name = string("op_1130"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_15_cast_fp16 = reshape(shape = var_1130, x = query_15_cast_fp16)[name = string("mh_q_15_cast_fp16")];
+            fp16 var_1132_to_fp16 = const()[name = string("op_1132_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1133_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1132_to_fp16)[name = string("op_1133_cast_fp16")];
+            tensor<int32, [4]> var_1134 = const()[name = string("op_1134"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1135_cast_fp16 = reshape(shape = var_1134, x = obj_71_cast_fp16)[name = string("op_1135_cast_fp16")];
+            bool mh_w_29_transpose_x_0 = const()[name = string("mh_w_29_transpose_x_0"), val = bool(true)];
+            bool mh_w_29_transpose_y_0 = const()[name = string("mh_w_29_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1133_cast_fp16, y = var_1135_cast_fp16)[name = string("mh_w_29_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_31_cast_fp16 = add(x = mh_w_29_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_31_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_77_cast_fp16 = softmax(axis = var_957, x = mh_w_31_cast_fp16)[name = string("obj_77_cast_fp16")];
+            tensor<int32, [4]> var_1144 = const()[name = string("op_1144"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1145_cast_fp16 = reshape(shape = var_1144, x = obj_73_cast_fp16)[name = string("op_1145_cast_fp16")];
+            bool attn_15_transpose_x_0 = const()[name = string("attn_15_transpose_x_0"), val = bool(false)];
+            bool attn_15_transpose_y_0 = const()[name = string("attn_15_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1145_cast_fp16, y = obj_77_cast_fp16)[name = string("attn_15_cast_fp16")];
+            tensor<int32, [4]> var_1148 = const()[name = string("op_1148"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_33_cast_fp16 = reshape(shape = var_1148, x = attn_15_cast_fp16)[name = string("input_33_cast_fp16")];
+            string var_1158_pad_type_0 = const()[name = string("op_1158_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1158_strides_0 = const()[name = string("op_1158_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1158_pad_0 = const()[name = string("op_1158_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1158_dilations_0 = const()[name = string("op_1158_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1158_groups_0 = const()[name = string("op_1158_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98381504))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98676480))))[name = string("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98676608)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1158_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1158_dilations_0, groups = var_1158_groups_0, pad = var_1158_pad_0, pad_type = var_1158_pad_type_0, strides = var_1158_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = string("op_1158_cast_fp16")];
+            string var_1164_pad_type_0 = const()[name = string("op_1164_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1164_strides_0 = const()[name = string("op_1164_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1164_pad_0 = const()[name = string("op_1164_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1164_dilations_0 = const()[name = string("op_1164_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1164_groups_0 = const()[name = string("op_1164_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98687168))), nonzero_data = tensor<fp16, [4420]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98678208))))[name = string("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1164_cast_fp16 = conv(dilations = var_1164_dilations_0, groups = var_1164_groups_0, pad = var_1164_pad_0, pad_type = var_1164_pad_type_0, strides = var_1164_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = string("op_1164_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_75_cast_fp16 = add(x = var_1158_cast_fp16, y = var_1164_cast_fp16)[name = string("obj_75_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_75_cast_fp16)[name = string("inputs_23_cast_fp16")];
+            tensor<int32, [1]> out_23_axes_0 = const()[name = string("out_23_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1175_to_fp16 = const()[name = string("op_1175_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1175_to_fp16, x = inputs_23_cast_fp16)[name = string("out_23_cast_fp16")];
+            tensor<fp16, [768]> input_35_gamma_0_to_fp16 = const()[name = string("input_35_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98760960)))];
+            tensor<fp16, [768]> input_35_beta_0_to_fp16 = const()[name = string("input_35_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98762560)))];
+            fp16 input_35_epsilon_0_to_fp16 = const()[name = string("input_35_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_23_cast_fp16)[name = string("input_35_cast_fp16")];
+            string var_1193_pad_type_0 = const()[name = string("op_1193_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1193_strides_0 = const()[name = string("op_1193_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1193_pad_0 = const()[name = string("op_1193_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1193_dilations_0 = const()[name = string("op_1193_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1193_groups_0 = const()[name = string("op_1193_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(98764160))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99943872))))[name = string("layers_3_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99944000)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_1193_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1193_dilations_0, groups = var_1193_groups_0, pad = var_1193_pad_0, pad_type = var_1193_pad_type_0, strides = var_1193_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = string("op_1193_cast_fp16")];
+            string var_1199_pad_type_0 = const()[name = string("op_1199_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1199_strides_0 = const()[name = string("op_1199_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1199_pad_0 = const()[name = string("op_1199_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1199_dilations_0 = const()[name = string("op_1199_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1199_groups_0 = const()[name = string("op_1199_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99977600))), nonzero_data = tensor<fp16, [13656]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99950208))))[name = string("layers_3_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_1199_cast_fp16 = conv(dilations = var_1199_dilations_0, groups = var_1199_groups_0, pad = var_1199_pad_0, pad_type = var_1199_pad_type_0, strides = var_1199_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = string("op_1199_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_37_cast_fp16 = add(x = var_1193_cast_fp16, y = var_1199_cast_fp16)[name = string("input_37_cast_fp16")];
+            string input_39_mode_0 = const()[name = string("input_39_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = string("input_39_cast_fp16")];
+            string var_1210_pad_type_0 = const()[name = string("op_1210_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1210_strides_0 = const()[name = string("op_1210_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1210_pad_0 = const()[name = string("op_1210_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1210_dilations_0 = const()[name = string("op_1210_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1210_groups_0 = const()[name = string("op_1210_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(100272576))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101452288))))[name = string("layers_3_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101452416)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1210_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1210_dilations_0, groups = var_1210_groups_0, pad = var_1210_pad_0, pad_type = var_1210_pad_type_0, strides = var_1210_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = string("op_1210_cast_fp16")];
+            string var_1216_pad_type_0 = const()[name = string("op_1216_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1216_strides_0 = const()[name = string("op_1216_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1216_pad_0 = const()[name = string("op_1216_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1216_dilations_0 = const()[name = string("op_1216_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1216_groups_0 = const()[name = string("op_1216_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101478464))), nonzero_data = tensor<fp16, [12173]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101454016))))[name = string("layers_3_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1216_cast_fp16 = conv(dilations = var_1216_dilations_0, groups = var_1216_groups_0, pad = var_1216_pad_0, pad_type = var_1216_pad_type_0, strides = var_1216_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = string("op_1216_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_9_cast_fp16 = add(x = var_1210_cast_fp16, y = var_1216_cast_fp16)[name = string("hidden_states_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = string("inputs_25_cast_fp16")];
+            tensor<int32, [4]> obj_89_begin_0 = const()[name = string("obj_89_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> obj_89_end_0 = const()[name = string("obj_89_end_0"), val = tensor<int32, [4]>([5, 768, 1, 1536])];
+            tensor<bool, [4]> obj_89_end_mask_0 = const()[name = string("obj_89_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_89_cast_fp16 = slice_by_index(begin = obj_89_begin_0, end = obj_89_end_0, end_mask = obj_89_end_mask_0, x = read_state_2)[name = string("obj_89_cast_fp16")];
+            tensor<int32, [4]> obj_91_begin_0 = const()[name = string("obj_91_begin_0"), val = tensor<int32, [4]>([4, 0, 0, 0])];
+            tensor<int32, [4]> obj_91_end_0 = const()[name = string("obj_91_end_0"), val = tensor<int32, [4]>([5, 768, 1, 1536])];
+            tensor<bool, [4]> obj_91_end_mask_0 = const()[name = string("obj_91_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_91_cast_fp16 = slice_by_index(begin = obj_91_begin_0, end = obj_91_end_0, end_mask = obj_91_end_mask_0, x = read_state_3)[name = string("obj_91_cast_fp16")];
+            int32 var_1238 = const()[name = string("op_1238"), val = int32(3)];
+            tensor<int32, [1]> out_25_axes_0 = const()[name = string("out_25_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1263_to_fp16 = const()[name = string("op_1263_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1263_to_fp16, x = inputs_25_cast_fp16)[name = string("out_25_cast_fp16")];
+            tensor<fp16, [768]> obj_79_gamma_0_to_fp16 = const()[name = string("obj_79_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101773440)))];
+            tensor<fp16, [768]> obj_79_beta_0_to_fp16 = const()[name = string("obj_79_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101775040)))];
+            fp16 obj_79_epsilon_0_to_fp16 = const()[name = string("obj_79_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_25_cast_fp16)[name = string("obj_79_cast_fp16")];
+            string var_1285_pad_type_0 = const()[name = string("op_1285_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1285_strides_0 = const()[name = string("op_1285_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1285_pad_0 = const()[name = string("op_1285_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1285_dilations_0 = const()[name = string("op_1285_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1285_groups_0 = const()[name = string("op_1285_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101776640))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102071616))))[name = string("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102071744)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1285_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1285_dilations_0, groups = var_1285_groups_0, pad = var_1285_pad_0, pad_type = var_1285_pad_type_0, strides = var_1285_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1285_cast_fp16")];
+            string var_1291_pad_type_0 = const()[name = string("op_1291_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1291_strides_0 = const()[name = string("op_1291_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1291_pad_0 = const()[name = string("op_1291_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1291_dilations_0 = const()[name = string("op_1291_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1291_groups_0 = const()[name = string("op_1291_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102081536))), nonzero_data = tensor<fp16, [4049]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102073344))))[name = string("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1291_cast_fp16 = conv(dilations = var_1291_dilations_0, groups = var_1291_groups_0, pad = var_1291_pad_0, pad_type = var_1291_pad_type_0, strides = var_1291_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1291_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_17_cast_fp16 = add(x = var_1285_cast_fp16, y = var_1291_cast_fp16)[name = string("query_17_cast_fp16")];
+            string var_1300_pad_type_0 = const()[name = string("op_1300_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1300_strides_0 = const()[name = string("op_1300_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1300_pad_0 = const()[name = string("op_1300_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1300_dilations_0 = const()[name = string("op_1300_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1300_groups_0 = const()[name = string("op_1300_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102155328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102450304))))[name = string("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_1300_cast_fp16 = conv(dilations = var_1300_dilations_0, groups = var_1300_groups_0, pad = var_1300_pad_0, pad_type = var_1300_pad_type_0, strides = var_1300_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1300_cast_fp16")];
+            string var_1306_pad_type_0 = const()[name = string("op_1306_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1306_strides_0 = const()[name = string("op_1306_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1306_pad_0 = const()[name = string("op_1306_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1306_dilations_0 = const()[name = string("op_1306_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1306_groups_0 = const()[name = string("op_1306_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102459584))), nonzero_data = tensor<fp16, [4525]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102450432))))[name = string("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1306_cast_fp16 = conv(dilations = var_1306_dilations_0, groups = var_1306_groups_0, pad = var_1306_pad_0, pad_type = var_1306_pad_type_0, strides = var_1306_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1306_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_9_cast_fp16 = add(x = var_1300_cast_fp16, y = var_1306_cast_fp16)[name = string("current_key_9_cast_fp16")];
+            string var_1316_pad_type_0 = const()[name = string("op_1316_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1316_strides_0 = const()[name = string("op_1316_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1316_pad_0 = const()[name = string("op_1316_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1316_dilations_0 = const()[name = string("op_1316_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1316_groups_0 = const()[name = string("op_1316_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102533376))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102828352))))[name = string("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102828480)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1316_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1316_dilations_0, groups = var_1316_groups_0, pad = var_1316_pad_0, pad_type = var_1316_pad_type_0, strides = var_1316_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = string("op_1316_cast_fp16")];
+            string var_1322_pad_type_0 = const()[name = string("op_1322_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1322_strides_0 = const()[name = string("op_1322_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1322_pad_0 = const()[name = string("op_1322_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1322_dilations_0 = const()[name = string("op_1322_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1322_groups_0 = const()[name = string("op_1322_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102836800))), nonzero_data = tensor<fp16, [3297]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102830080))))[name = string("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1322_cast_fp16 = conv(dilations = var_1322_dilations_0, groups = var_1322_groups_0, pad = var_1322_pad_0, pad_type = var_1322_pad_type_0, strides = var_1322_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = string("op_1322_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_9_cast_fp16 = add(x = var_1316_cast_fp16, y = var_1322_cast_fp16)[name = string("current_value_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1328_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_202_cast_fp16)[name = string("op_1328_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_9_cast_fp16 = add(x = var_71_cast_fp16_4, y = var_1328_cast_fp16)[name = string("key_9_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1330_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_202_cast_fp16)[name = string("op_1330_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_9_cast_fp16 = add(x = var_86_cast_fp16_4, y = var_1330_cast_fp16)[name = string("value_9_cast_fp16")];
+            tensor<int32, [4]> var_1333 = const()[name = string("op_1333"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_17_cast_fp16 = reshape(shape = var_1333, x = query_17_cast_fp16)[name = string("mh_q_17_cast_fp16")];
+            fp16 var_1335_to_fp16 = const()[name = string("op_1335_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1336_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1335_to_fp16)[name = string("op_1336_cast_fp16")];
+            tensor<int32, [4]> var_1337 = const()[name = string("op_1337"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1338_cast_fp16 = reshape(shape = var_1337, x = key_9_cast_fp16)[name = string("op_1338_cast_fp16")];
+            bool mh_w_33_transpose_x_0 = const()[name = string("mh_w_33_transpose_x_0"), val = bool(true)];
+            bool mh_w_33_transpose_y_0 = const()[name = string("mh_w_33_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_33_cast_fp16 = matmul(transpose_x = mh_w_33_transpose_x_0, transpose_y = mh_w_33_transpose_y_0, x = var_1336_cast_fp16, y = var_1338_cast_fp16)[name = string("mh_w_33_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_35_cast_fp16 = add(x = mh_w_33_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_35_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_1346_cast_fp16 = softmax(axis = var_1238, x = mh_w_35_cast_fp16)[name = string("op_1346_cast_fp16")];
+            tensor<int32, [4]> var_1347 = const()[name = string("op_1347"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1348_cast_fp16 = reshape(shape = var_1347, x = value_9_cast_fp16)[name = string("op_1348_cast_fp16")];
+            bool attn_17_transpose_x_0 = const()[name = string("attn_17_transpose_x_0"), val = bool(false)];
+            bool attn_17_transpose_y_0 = const()[name = string("attn_17_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1348_cast_fp16, y = var_1346_cast_fp16)[name = string("attn_17_cast_fp16")];
+            tensor<int32, [4]> var_1351 = const()[name = string("op_1351"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_41_cast_fp16 = reshape(shape = var_1351, x = attn_17_cast_fp16)[name = string("input_41_cast_fp16")];
+            string var_1361_pad_type_0 = const()[name = string("op_1361_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1361_strides_0 = const()[name = string("op_1361_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1361_pad_0 = const()[name = string("op_1361_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1361_dilations_0 = const()[name = string("op_1361_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1361_groups_0 = const()[name = string("op_1361_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102910592))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103205568))))[name = string("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103205696)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1361_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1361_dilations_0, groups = var_1361_groups_0, pad = var_1361_pad_0, pad_type = var_1361_pad_type_0, strides = var_1361_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = string("op_1361_cast_fp16")];
+            string var_1367_pad_type_0 = const()[name = string("op_1367_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1367_strides_0 = const()[name = string("op_1367_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1367_pad_0 = const()[name = string("op_1367_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1367_dilations_0 = const()[name = string("op_1367_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1367_groups_0 = const()[name = string("op_1367_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103216512))), nonzero_data = tensor<fp16, [4545]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103207296))))[name = string("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1367_cast_fp16 = conv(dilations = var_1367_dilations_0, groups = var_1367_groups_0, pad = var_1367_pad_0, pad_type = var_1367_pad_type_0, strides = var_1367_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = string("op_1367_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_85_cast_fp16 = add(x = var_1361_cast_fp16, y = var_1367_cast_fp16)[name = string("obj_85_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_85_cast_fp16)[name = string("inputs_27_cast_fp16")];
+            tensor<int32, [1]> out_27_axes_0 = const()[name = string("out_27_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1382_to_fp16 = const()[name = string("op_1382_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1382_to_fp16, x = inputs_27_cast_fp16)[name = string("out_27_cast_fp16")];
+            tensor<fp16, [768]> obj_87_gamma_0_to_fp16 = const()[name = string("obj_87_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103290304)))];
+            tensor<fp16, [768]> obj_87_beta_0_to_fp16 = const()[name = string("obj_87_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103291904)))];
+            fp16 obj_87_epsilon_0_to_fp16 = const()[name = string("obj_87_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_87_cast_fp16 = batch_norm(beta = obj_87_beta_0_to_fp16, epsilon = obj_87_epsilon_0_to_fp16, gamma = obj_87_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_27_cast_fp16)[name = string("obj_87_cast_fp16")];
+            string var_1402_pad_type_0 = const()[name = string("op_1402_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1402_strides_0 = const()[name = string("op_1402_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1402_pad_0 = const()[name = string("op_1402_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1402_dilations_0 = const()[name = string("op_1402_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1402_groups_0 = const()[name = string("op_1402_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103293504))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103588480))))[name = string("layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103588608)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1402_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1402_dilations_0, groups = var_1402_groups_0, pad = var_1402_pad_0, pad_type = var_1402_pad_type_0, strides = var_1402_strides_0, weight = layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_87_cast_fp16)[name = string("op_1402_cast_fp16")];
+            string var_1408_pad_type_0 = const()[name = string("op_1408_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1408_strides_0 = const()[name = string("op_1408_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1408_pad_0 = const()[name = string("op_1408_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1408_dilations_0 = const()[name = string("op_1408_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1408_groups_0 = const()[name = string("op_1408_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103598720))), nonzero_data = tensor<fp16, [4224]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103590208))))[name = string("layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1408_cast_fp16 = conv(dilations = var_1408_dilations_0, groups = var_1408_groups_0, pad = var_1408_pad_0, pad_type = var_1408_pad_type_0, strides = var_1408_strides_0, weight = layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_87_cast_fp16)[name = string("op_1408_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_19_cast_fp16 = add(x = var_1402_cast_fp16, y = var_1408_cast_fp16)[name = string("query_19_cast_fp16")];
+            tensor<int32, [4]> var_1411 = const()[name = string("op_1411"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_19_cast_fp16 = reshape(shape = var_1411, x = query_19_cast_fp16)[name = string("mh_q_19_cast_fp16")];
+            fp16 var_1413_to_fp16 = const()[name = string("op_1413_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1414_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1413_to_fp16)[name = string("op_1414_cast_fp16")];
+            tensor<int32, [4]> var_1415 = const()[name = string("op_1415"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1416_cast_fp16 = reshape(shape = var_1415, x = obj_89_cast_fp16)[name = string("op_1416_cast_fp16")];
+            bool mh_w_37_transpose_x_0 = const()[name = string("mh_w_37_transpose_x_0"), val = bool(true)];
+            bool mh_w_37_transpose_y_0 = const()[name = string("mh_w_37_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_1414_cast_fp16, y = var_1416_cast_fp16)[name = string("mh_w_37_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_39_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_95_cast_fp16 = softmax(axis = var_1238, x = mh_w_39_cast_fp16)[name = string("obj_95_cast_fp16")];
+            tensor<int32, [4]> var_1425 = const()[name = string("op_1425"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1426_cast_fp16 = reshape(shape = var_1425, x = obj_91_cast_fp16)[name = string("op_1426_cast_fp16")];
+            bool attn_19_transpose_x_0 = const()[name = string("attn_19_transpose_x_0"), val = bool(false)];
+            bool attn_19_transpose_y_0 = const()[name = string("attn_19_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1426_cast_fp16, y = obj_95_cast_fp16)[name = string("attn_19_cast_fp16")];
+            tensor<int32, [4]> var_1429 = const()[name = string("op_1429"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_43_cast_fp16 = reshape(shape = var_1429, x = attn_19_cast_fp16)[name = string("input_43_cast_fp16")];
+            string var_1439_pad_type_0 = const()[name = string("op_1439_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1439_strides_0 = const()[name = string("op_1439_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1439_pad_0 = const()[name = string("op_1439_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1439_dilations_0 = const()[name = string("op_1439_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1439_groups_0 = const()[name = string("op_1439_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103672512))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103967488))))[name = string("layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103967616)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1439_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1439_dilations_0, groups = var_1439_groups_0, pad = var_1439_pad_0, pad_type = var_1439_pad_type_0, strides = var_1439_strides_0, weight = layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = string("op_1439_cast_fp16")];
+            string var_1445_pad_type_0 = const()[name = string("op_1445_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1445_strides_0 = const()[name = string("op_1445_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1445_pad_0 = const()[name = string("op_1445_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1445_dilations_0 = const()[name = string("op_1445_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1445_groups_0 = const()[name = string("op_1445_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103977024))), nonzero_data = tensor<fp16, [3861]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(103969216))))[name = string("layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1445_cast_fp16 = conv(dilations = var_1445_dilations_0, groups = var_1445_groups_0, pad = var_1445_pad_0, pad_type = var_1445_pad_type_0, strides = var_1445_strides_0, weight = layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = string("op_1445_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_93_cast_fp16 = add(x = var_1439_cast_fp16, y = var_1445_cast_fp16)[name = string("obj_93_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_93_cast_fp16)[name = string("inputs_29_cast_fp16")];
+            tensor<int32, [1]> out_29_axes_0 = const()[name = string("out_29_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1456_to_fp16 = const()[name = string("op_1456_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1456_to_fp16, x = inputs_29_cast_fp16)[name = string("out_29_cast_fp16")];
+            tensor<fp16, [768]> input_45_gamma_0_to_fp16 = const()[name = string("input_45_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104050816)))];
+            tensor<fp16, [768]> input_45_beta_0_to_fp16 = const()[name = string("input_45_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104052416)))];
+            fp16 input_45_epsilon_0_to_fp16 = const()[name = string("input_45_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_29_cast_fp16)[name = string("input_45_cast_fp16")];
+            string var_1474_pad_type_0 = const()[name = string("op_1474_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1474_strides_0 = const()[name = string("op_1474_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1474_pad_0 = const()[name = string("op_1474_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1474_dilations_0 = const()[name = string("op_1474_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1474_groups_0 = const()[name = string("op_1474_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104054016))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105233728))))[name = string("layers_4_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105233856)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_1474_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1474_dilations_0, groups = var_1474_groups_0, pad = var_1474_pad_0, pad_type = var_1474_pad_type_0, strides = var_1474_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = string("op_1474_cast_fp16")];
+            string var_1480_pad_type_0 = const()[name = string("op_1480_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1480_strides_0 = const()[name = string("op_1480_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1480_pad_0 = const()[name = string("op_1480_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1480_dilations_0 = const()[name = string("op_1480_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1480_groups_0 = const()[name = string("op_1480_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105261376))), nonzero_data = tensor<fp16, [10593]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105240064))))[name = string("layers_4_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_1480_cast_fp16 = conv(dilations = var_1480_dilations_0, groups = var_1480_groups_0, pad = var_1480_pad_0, pad_type = var_1480_pad_type_0, strides = var_1480_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_45_cast_fp16)[name = string("op_1480_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_47_cast_fp16 = add(x = var_1474_cast_fp16, y = var_1480_cast_fp16)[name = string("input_47_cast_fp16")];
+            string input_49_mode_0 = const()[name = string("input_49_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = string("input_49_cast_fp16")];
+            string var_1491_pad_type_0 = const()[name = string("op_1491_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1491_strides_0 = const()[name = string("op_1491_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1491_pad_0 = const()[name = string("op_1491_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1491_dilations_0 = const()[name = string("op_1491_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1491_groups_0 = const()[name = string("op_1491_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105556352))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106736064))))[name = string("layers_4_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106736192)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1491_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1491_dilations_0, groups = var_1491_groups_0, pad = var_1491_pad_0, pad_type = var_1491_pad_type_0, strides = var_1491_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = string("op_1491_cast_fp16")];
+            string var_1497_pad_type_0 = const()[name = string("op_1497_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1497_strides_0 = const()[name = string("op_1497_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1497_pad_0 = const()[name = string("op_1497_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1497_dilations_0 = const()[name = string("op_1497_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1497_groups_0 = const()[name = string("op_1497_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106759168))), nonzero_data = tensor<fp16, [10641]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(106737792))))[name = string("layers_4_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1497_cast_fp16 = conv(dilations = var_1497_dilations_0, groups = var_1497_groups_0, pad = var_1497_pad_0, pad_type = var_1497_pad_type_0, strides = var_1497_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = string("op_1497_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_11_cast_fp16 = add(x = var_1491_cast_fp16, y = var_1497_cast_fp16)[name = string("hidden_states_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = string("inputs_31_cast_fp16")];
+            tensor<int32, [4]> obj_107_begin_0 = const()[name = string("obj_107_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> obj_107_end_0 = const()[name = string("obj_107_end_0"), val = tensor<int32, [4]>([6, 768, 1, 1536])];
+            tensor<bool, [4]> obj_107_end_mask_0 = const()[name = string("obj_107_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_107_cast_fp16 = slice_by_index(begin = obj_107_begin_0, end = obj_107_end_0, end_mask = obj_107_end_mask_0, x = read_state_2)[name = string("obj_107_cast_fp16")];
+            tensor<int32, [4]> obj_109_begin_0 = const()[name = string("obj_109_begin_0"), val = tensor<int32, [4]>([5, 0, 0, 0])];
+            tensor<int32, [4]> obj_109_end_0 = const()[name = string("obj_109_end_0"), val = tensor<int32, [4]>([6, 768, 1, 1536])];
+            tensor<bool, [4]> obj_109_end_mask_0 = const()[name = string("obj_109_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_109_cast_fp16 = slice_by_index(begin = obj_109_begin_0, end = obj_109_end_0, end_mask = obj_109_end_mask_0, x = read_state_3)[name = string("obj_109_cast_fp16")];
+            int32 var_1519 = const()[name = string("op_1519"), val = int32(3)];
+            tensor<int32, [1]> out_31_axes_0 = const()[name = string("out_31_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1544_to_fp16 = const()[name = string("op_1544_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1544_to_fp16, x = inputs_31_cast_fp16)[name = string("out_31_cast_fp16")];
+            tensor<fp16, [768]> obj_97_gamma_0_to_fp16 = const()[name = string("obj_97_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107054144)))];
+            tensor<fp16, [768]> obj_97_beta_0_to_fp16 = const()[name = string("obj_97_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107055744)))];
+            fp16 obj_97_epsilon_0_to_fp16 = const()[name = string("obj_97_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_97_cast_fp16 = batch_norm(beta = obj_97_beta_0_to_fp16, epsilon = obj_97_epsilon_0_to_fp16, gamma = obj_97_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_31_cast_fp16)[name = string("obj_97_cast_fp16")];
+            string var_1566_pad_type_0 = const()[name = string("op_1566_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1566_strides_0 = const()[name = string("op_1566_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1566_pad_0 = const()[name = string("op_1566_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1566_dilations_0 = const()[name = string("op_1566_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1566_groups_0 = const()[name = string("op_1566_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107057344))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107352320))))[name = string("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107352448)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1566_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1566_dilations_0, groups = var_1566_groups_0, pad = var_1566_pad_0, pad_type = var_1566_pad_type_0, strides = var_1566_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1566_cast_fp16")];
+            string var_1572_pad_type_0 = const()[name = string("op_1572_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1572_strides_0 = const()[name = string("op_1572_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1572_pad_0 = const()[name = string("op_1572_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1572_dilations_0 = const()[name = string("op_1572_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1572_groups_0 = const()[name = string("op_1572_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107362752))), nonzero_data = tensor<fp16, [4314]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107354048))))[name = string("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1572_cast_fp16 = conv(dilations = var_1572_dilations_0, groups = var_1572_groups_0, pad = var_1572_pad_0, pad_type = var_1572_pad_type_0, strides = var_1572_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1572_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_21_cast_fp16 = add(x = var_1566_cast_fp16, y = var_1572_cast_fp16)[name = string("query_21_cast_fp16")];
+            string var_1581_pad_type_0 = const()[name = string("op_1581_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1581_strides_0 = const()[name = string("op_1581_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1581_pad_0 = const()[name = string("op_1581_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1581_dilations_0 = const()[name = string("op_1581_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1581_groups_0 = const()[name = string("op_1581_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107436544))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107731520))))[name = string("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_1581_cast_fp16 = conv(dilations = var_1581_dilations_0, groups = var_1581_groups_0, pad = var_1581_pad_0, pad_type = var_1581_pad_type_0, strides = var_1581_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1581_cast_fp16")];
+            string var_1587_pad_type_0 = const()[name = string("op_1587_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1587_strides_0 = const()[name = string("op_1587_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1587_pad_0 = const()[name = string("op_1587_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1587_dilations_0 = const()[name = string("op_1587_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1587_groups_0 = const()[name = string("op_1587_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107741056))), nonzero_data = tensor<fp16, [4661]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107731648))))[name = string("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1587_cast_fp16 = conv(dilations = var_1587_dilations_0, groups = var_1587_groups_0, pad = var_1587_pad_0, pad_type = var_1587_pad_type_0, strides = var_1587_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1587_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_11_cast_fp16 = add(x = var_1581_cast_fp16, y = var_1587_cast_fp16)[name = string("current_key_11_cast_fp16")];
+            string var_1597_pad_type_0 = const()[name = string("op_1597_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1597_strides_0 = const()[name = string("op_1597_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1597_pad_0 = const()[name = string("op_1597_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1597_dilations_0 = const()[name = string("op_1597_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1597_groups_0 = const()[name = string("op_1597_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107814848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108109824))))[name = string("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108109952)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1597_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1597_dilations_0, groups = var_1597_groups_0, pad = var_1597_pad_0, pad_type = var_1597_pad_type_0, strides = var_1597_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = string("op_1597_cast_fp16")];
+            string var_1603_pad_type_0 = const()[name = string("op_1603_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1603_strides_0 = const()[name = string("op_1603_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1603_pad_0 = const()[name = string("op_1603_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1603_dilations_0 = const()[name = string("op_1603_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1603_groups_0 = const()[name = string("op_1603_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108117632))), nonzero_data = tensor<fp16, [2999]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108111552))))[name = string("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1603_cast_fp16 = conv(dilations = var_1603_dilations_0, groups = var_1603_groups_0, pad = var_1603_pad_0, pad_type = var_1603_pad_type_0, strides = var_1603_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = string("op_1603_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_11_cast_fp16 = add(x = var_1597_cast_fp16, y = var_1603_cast_fp16)[name = string("current_value_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1609_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_202_cast_fp16)[name = string("op_1609_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_11_cast_fp16 = add(x = var_71_cast_fp16_5, y = var_1609_cast_fp16)[name = string("key_11_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1611_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_202_cast_fp16)[name = string("op_1611_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_11_cast_fp16 = add(x = var_86_cast_fp16_5, y = var_1611_cast_fp16)[name = string("value_11_cast_fp16")];
+            tensor<int32, [4]> var_1614 = const()[name = string("op_1614"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_21_cast_fp16 = reshape(shape = var_1614, x = query_21_cast_fp16)[name = string("mh_q_21_cast_fp16")];
+            fp16 var_1616_to_fp16 = const()[name = string("op_1616_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1617_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1616_to_fp16)[name = string("op_1617_cast_fp16")];
+            tensor<int32, [4]> var_1618 = const()[name = string("op_1618"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1619_cast_fp16 = reshape(shape = var_1618, x = key_11_cast_fp16)[name = string("op_1619_cast_fp16")];
+            bool mh_w_41_transpose_x_0 = const()[name = string("mh_w_41_transpose_x_0"), val = bool(true)];
+            bool mh_w_41_transpose_y_0 = const()[name = string("mh_w_41_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_1617_cast_fp16, y = var_1619_cast_fp16)[name = string("mh_w_41_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_43_cast_fp16 = add(x = mh_w_41_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_43_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_1627_cast_fp16 = softmax(axis = var_1519, x = mh_w_43_cast_fp16)[name = string("op_1627_cast_fp16")];
+            tensor<int32, [4]> var_1628 = const()[name = string("op_1628"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1629_cast_fp16 = reshape(shape = var_1628, x = value_11_cast_fp16)[name = string("op_1629_cast_fp16")];
+            bool attn_21_transpose_x_0 = const()[name = string("attn_21_transpose_x_0"), val = bool(false)];
+            bool attn_21_transpose_y_0 = const()[name = string("attn_21_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1629_cast_fp16, y = var_1627_cast_fp16)[name = string("attn_21_cast_fp16")];
+            tensor<int32, [4]> var_1632 = const()[name = string("op_1632"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_51_cast_fp16 = reshape(shape = var_1632, x = attn_21_cast_fp16)[name = string("input_51_cast_fp16")];
+            string var_1642_pad_type_0 = const()[name = string("op_1642_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1642_strides_0 = const()[name = string("op_1642_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1642_pad_0 = const()[name = string("op_1642_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1642_dilations_0 = const()[name = string("op_1642_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1642_groups_0 = const()[name = string("op_1642_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108191424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108486400))))[name = string("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108486528)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1642_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1642_dilations_0, groups = var_1642_groups_0, pad = var_1642_pad_0, pad_type = var_1642_pad_type_0, strides = var_1642_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = string("op_1642_cast_fp16")];
+            string var_1648_pad_type_0 = const()[name = string("op_1648_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1648_strides_0 = const()[name = string("op_1648_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1648_pad_0 = const()[name = string("op_1648_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1648_dilations_0 = const()[name = string("op_1648_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1648_groups_0 = const()[name = string("op_1648_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108495488))), nonzero_data = tensor<fp16, [3625]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108488128))))[name = string("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1648_cast_fp16 = conv(dilations = var_1648_dilations_0, groups = var_1648_groups_0, pad = var_1648_pad_0, pad_type = var_1648_pad_type_0, strides = var_1648_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = string("op_1648_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_103_cast_fp16 = add(x = var_1642_cast_fp16, y = var_1648_cast_fp16)[name = string("obj_103_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_103_cast_fp16)[name = string("inputs_33_cast_fp16")];
+            tensor<int32, [1]> out_33_axes_0 = const()[name = string("out_33_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1663_to_fp16 = const()[name = string("op_1663_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1663_to_fp16, x = inputs_33_cast_fp16)[name = string("out_33_cast_fp16")];
+            tensor<fp16, [768]> obj_105_gamma_0_to_fp16 = const()[name = string("obj_105_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108569280)))];
+            tensor<fp16, [768]> obj_105_beta_0_to_fp16 = const()[name = string("obj_105_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108570880)))];
+            fp16 obj_105_epsilon_0_to_fp16 = const()[name = string("obj_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_105_cast_fp16 = batch_norm(beta = obj_105_beta_0_to_fp16, epsilon = obj_105_epsilon_0_to_fp16, gamma = obj_105_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_33_cast_fp16)[name = string("obj_105_cast_fp16")];
+            string var_1683_pad_type_0 = const()[name = string("op_1683_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1683_strides_0 = const()[name = string("op_1683_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1683_pad_0 = const()[name = string("op_1683_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1683_dilations_0 = const()[name = string("op_1683_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1683_groups_0 = const()[name = string("op_1683_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108572480))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108867456))))[name = string("layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108867584)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1683_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1683_dilations_0, groups = var_1683_groups_0, pad = var_1683_pad_0, pad_type = var_1683_pad_type_0, strides = var_1683_strides_0, weight = layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = string("op_1683_cast_fp16")];
+            string var_1689_pad_type_0 = const()[name = string("op_1689_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1689_strides_0 = const()[name = string("op_1689_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1689_pad_0 = const()[name = string("op_1689_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1689_dilations_0 = const()[name = string("op_1689_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1689_groups_0 = const()[name = string("op_1689_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108875648))), nonzero_data = tensor<fp16, [3172]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108869184))))[name = string("layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1689_cast_fp16 = conv(dilations = var_1689_dilations_0, groups = var_1689_groups_0, pad = var_1689_pad_0, pad_type = var_1689_pad_type_0, strides = var_1689_strides_0, weight = layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = string("op_1689_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_23_cast_fp16 = add(x = var_1683_cast_fp16, y = var_1689_cast_fp16)[name = string("query_23_cast_fp16")];
+            tensor<int32, [4]> var_1692 = const()[name = string("op_1692"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_23_cast_fp16 = reshape(shape = var_1692, x = query_23_cast_fp16)[name = string("mh_q_23_cast_fp16")];
+            fp16 var_1694_to_fp16 = const()[name = string("op_1694_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1695_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_1694_to_fp16)[name = string("op_1695_cast_fp16")];
+            tensor<int32, [4]> var_1696 = const()[name = string("op_1696"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1697_cast_fp16 = reshape(shape = var_1696, x = obj_107_cast_fp16)[name = string("op_1697_cast_fp16")];
+            bool mh_w_45_transpose_x_0 = const()[name = string("mh_w_45_transpose_x_0"), val = bool(true)];
+            bool mh_w_45_transpose_y_0 = const()[name = string("mh_w_45_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_45_cast_fp16 = matmul(transpose_x = mh_w_45_transpose_x_0, transpose_y = mh_w_45_transpose_y_0, x = var_1695_cast_fp16, y = var_1697_cast_fp16)[name = string("mh_w_45_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_47_cast_fp16 = add(x = mh_w_45_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_47_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_113_cast_fp16 = softmax(axis = var_1519, x = mh_w_47_cast_fp16)[name = string("obj_113_cast_fp16")];
+            tensor<int32, [4]> var_1706 = const()[name = string("op_1706"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1707_cast_fp16 = reshape(shape = var_1706, x = obj_109_cast_fp16)[name = string("op_1707_cast_fp16")];
+            bool attn_23_transpose_x_0 = const()[name = string("attn_23_transpose_x_0"), val = bool(false)];
+            bool attn_23_transpose_y_0 = const()[name = string("attn_23_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1707_cast_fp16, y = obj_113_cast_fp16)[name = string("attn_23_cast_fp16")];
+            tensor<int32, [4]> var_1710 = const()[name = string("op_1710"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_53_cast_fp16 = reshape(shape = var_1710, x = attn_23_cast_fp16)[name = string("input_53_cast_fp16")];
+            string var_1720_pad_type_0 = const()[name = string("op_1720_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1720_strides_0 = const()[name = string("op_1720_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1720_pad_0 = const()[name = string("op_1720_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1720_dilations_0 = const()[name = string("op_1720_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1720_groups_0 = const()[name = string("op_1720_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(108949440))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109244416))))[name = string("layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109244544)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1720_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1720_dilations_0, groups = var_1720_groups_0, pad = var_1720_pad_0, pad_type = var_1720_pad_type_0, strides = var_1720_strides_0, weight = layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = string("op_1720_cast_fp16")];
+            string var_1726_pad_type_0 = const()[name = string("op_1726_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1726_strides_0 = const()[name = string("op_1726_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1726_pad_0 = const()[name = string("op_1726_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1726_dilations_0 = const()[name = string("op_1726_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1726_groups_0 = const()[name = string("op_1726_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109251648))), nonzero_data = tensor<fp16, [2710]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109246144))))[name = string("layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1726_cast_fp16 = conv(dilations = var_1726_dilations_0, groups = var_1726_groups_0, pad = var_1726_pad_0, pad_type = var_1726_pad_type_0, strides = var_1726_strides_0, weight = layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_53_cast_fp16)[name = string("op_1726_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_111_cast_fp16 = add(x = var_1720_cast_fp16, y = var_1726_cast_fp16)[name = string("obj_111_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_111_cast_fp16)[name = string("inputs_35_cast_fp16")];
+            tensor<int32, [1]> out_35_axes_0 = const()[name = string("out_35_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1740_to_fp16 = const()[name = string("op_1740_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1740_to_fp16, x = inputs_35_cast_fp16)[name = string("out_35_cast_fp16")];
+            tensor<fp16, [768]> input_55_gamma_0_to_fp16 = const()[name = string("input_55_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109325440)))];
+            tensor<fp16, [768]> input_55_beta_0_to_fp16 = const()[name = string("input_55_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109327040)))];
+            fp16 input_55_epsilon_0_to_fp16 = const()[name = string("input_55_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_35_cast_fp16)[name = string("input_55_cast_fp16")];
+            string var_1758_pad_type_0 = const()[name = string("op_1758_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1758_strides_0 = const()[name = string("op_1758_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1758_pad_0 = const()[name = string("op_1758_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1758_dilations_0 = const()[name = string("op_1758_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1758_groups_0 = const()[name = string("op_1758_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109328640))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110508352))))[name = string("layers_5_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110508480)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_1758_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1758_dilations_0, groups = var_1758_groups_0, pad = var_1758_pad_0, pad_type = var_1758_pad_type_0, strides = var_1758_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = string("op_1758_cast_fp16")];
+            string var_1764_pad_type_0 = const()[name = string("op_1764_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1764_strides_0 = const()[name = string("op_1764_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1764_pad_0 = const()[name = string("op_1764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1764_dilations_0 = const()[name = string("op_1764_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1764_groups_0 = const()[name = string("op_1764_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110534016))), nonzero_data = tensor<fp16, [9617]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110514688))))[name = string("layers_5_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_1764_cast_fp16 = conv(dilations = var_1764_dilations_0, groups = var_1764_groups_0, pad = var_1764_pad_0, pad_type = var_1764_pad_type_0, strides = var_1764_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = string("op_1764_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_57_cast_fp16 = add(x = var_1758_cast_fp16, y = var_1764_cast_fp16)[name = string("input_57_cast_fp16")];
+            string input_59_mode_0 = const()[name = string("input_59_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = string("input_59_cast_fp16")];
+            string var_1775_pad_type_0 = const()[name = string("op_1775_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1775_strides_0 = const()[name = string("op_1775_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1775_pad_0 = const()[name = string("op_1775_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1775_dilations_0 = const()[name = string("op_1775_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1775_groups_0 = const()[name = string("op_1775_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110828992))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112008704))))[name = string("layers_5_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112008832)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1775_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1775_dilations_0, groups = var_1775_groups_0, pad = var_1775_pad_0, pad_type = var_1775_pad_type_0, strides = var_1775_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = string("op_1775_cast_fp16")];
+            string var_1781_pad_type_0 = const()[name = string("op_1781_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1781_strides_0 = const()[name = string("op_1781_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1781_pad_0 = const()[name = string("op_1781_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1781_dilations_0 = const()[name = string("op_1781_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1781_groups_0 = const()[name = string("op_1781_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112033024))), nonzero_data = tensor<fp16, [11249]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112010432))))[name = string("layers_5_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1781_cast_fp16 = conv(dilations = var_1781_dilations_0, groups = var_1781_groups_0, pad = var_1781_pad_0, pad_type = var_1781_pad_type_0, strides = var_1781_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = string("op_1781_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_13_cast_fp16 = add(x = var_1775_cast_fp16, y = var_1781_cast_fp16)[name = string("hidden_states_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = string("inputs_37_cast_fp16")];
+            tensor<int32, [4]> obj_125_begin_0 = const()[name = string("obj_125_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> obj_125_end_0 = const()[name = string("obj_125_end_0"), val = tensor<int32, [4]>([7, 768, 1, 1536])];
+            tensor<bool, [4]> obj_125_end_mask_0 = const()[name = string("obj_125_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_125_cast_fp16 = slice_by_index(begin = obj_125_begin_0, end = obj_125_end_0, end_mask = obj_125_end_mask_0, x = read_state_2)[name = string("obj_125_cast_fp16")];
+            tensor<int32, [4]> obj_127_begin_0 = const()[name = string("obj_127_begin_0"), val = tensor<int32, [4]>([6, 0, 0, 0])];
+            tensor<int32, [4]> obj_127_end_0 = const()[name = string("obj_127_end_0"), val = tensor<int32, [4]>([7, 768, 1, 1536])];
+            tensor<bool, [4]> obj_127_end_mask_0 = const()[name = string("obj_127_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_127_cast_fp16 = slice_by_index(begin = obj_127_begin_0, end = obj_127_end_0, end_mask = obj_127_end_mask_0, x = read_state_3)[name = string("obj_127_cast_fp16")];
+            int32 var_1804 = const()[name = string("op_1804"), val = int32(3)];
+            tensor<int32, [1]> out_37_axes_0 = const()[name = string("out_37_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1829_to_fp16 = const()[name = string("op_1829_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1829_to_fp16, x = inputs_37_cast_fp16)[name = string("out_37_cast_fp16")];
+            tensor<fp16, [768]> obj_115_gamma_0_to_fp16 = const()[name = string("obj_115_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112328000)))];
+            tensor<fp16, [768]> obj_115_beta_0_to_fp16 = const()[name = string("obj_115_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112329600)))];
+            fp16 obj_115_epsilon_0_to_fp16 = const()[name = string("obj_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_115_cast_fp16 = batch_norm(beta = obj_115_beta_0_to_fp16, epsilon = obj_115_epsilon_0_to_fp16, gamma = obj_115_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_37_cast_fp16)[name = string("obj_115_cast_fp16")];
+            string var_1851_pad_type_0 = const()[name = string("op_1851_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1851_strides_0 = const()[name = string("op_1851_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1851_pad_0 = const()[name = string("op_1851_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1851_dilations_0 = const()[name = string("op_1851_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1851_groups_0 = const()[name = string("op_1851_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112331200))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112626176))))[name = string("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112626304)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1851_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1851_dilations_0, groups = var_1851_groups_0, pad = var_1851_pad_0, pad_type = var_1851_pad_type_0, strides = var_1851_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1851_cast_fp16")];
+            string var_1857_pad_type_0 = const()[name = string("op_1857_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1857_strides_0 = const()[name = string("op_1857_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1857_pad_0 = const()[name = string("op_1857_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1857_dilations_0 = const()[name = string("op_1857_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1857_groups_0 = const()[name = string("op_1857_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112636224))), nonzero_data = tensor<fp16, [4112]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112627904))))[name = string("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1857_cast_fp16 = conv(dilations = var_1857_dilations_0, groups = var_1857_groups_0, pad = var_1857_pad_0, pad_type = var_1857_pad_type_0, strides = var_1857_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1857_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_25_cast_fp16 = add(x = var_1851_cast_fp16, y = var_1857_cast_fp16)[name = string("query_25_cast_fp16")];
+            string var_1866_pad_type_0 = const()[name = string("op_1866_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1866_strides_0 = const()[name = string("op_1866_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1866_pad_0 = const()[name = string("op_1866_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1866_dilations_0 = const()[name = string("op_1866_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1866_groups_0 = const()[name = string("op_1866_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(112710016))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113004992))))[name = string("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_1866_cast_fp16 = conv(dilations = var_1866_dilations_0, groups = var_1866_groups_0, pad = var_1866_pad_0, pad_type = var_1866_pad_type_0, strides = var_1866_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1866_cast_fp16")];
+            string var_1872_pad_type_0 = const()[name = string("op_1872_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1872_strides_0 = const()[name = string("op_1872_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1872_pad_0 = const()[name = string("op_1872_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1872_dilations_0 = const()[name = string("op_1872_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1872_groups_0 = const()[name = string("op_1872_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113013632))), nonzero_data = tensor<fp16, [4194]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113005120))))[name = string("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1872_cast_fp16 = conv(dilations = var_1872_dilations_0, groups = var_1872_groups_0, pad = var_1872_pad_0, pad_type = var_1872_pad_type_0, strides = var_1872_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1872_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_13_cast_fp16 = add(x = var_1866_cast_fp16, y = var_1872_cast_fp16)[name = string("current_key_13_cast_fp16")];
+            string var_1882_pad_type_0 = const()[name = string("op_1882_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1882_strides_0 = const()[name = string("op_1882_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1882_pad_0 = const()[name = string("op_1882_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1882_dilations_0 = const()[name = string("op_1882_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1882_groups_0 = const()[name = string("op_1882_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113087424))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113382400))))[name = string("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113382528)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1882_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1882_dilations_0, groups = var_1882_groups_0, pad = var_1882_pad_0, pad_type = var_1882_pad_type_0, strides = var_1882_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_115_cast_fp16)[name = string("op_1882_cast_fp16")];
+            string var_1888_pad_type_0 = const()[name = string("op_1888_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1888_strides_0 = const()[name = string("op_1888_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1888_pad_0 = const()[name = string("op_1888_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1888_dilations_0 = const()[name = string("op_1888_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1888_groups_0 = const()[name = string("op_1888_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113389824))), nonzero_data = tensor<fp16, [2798]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113384128))))[name = string("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1888_cast_fp16 = conv(dilations = var_1888_dilations_0, groups = var_1888_groups_0, pad = var_1888_pad_0, pad_type = var_1888_pad_type_0, strides = var_1888_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_115_cast_fp16)[name = string("op_1888_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_13_cast_fp16 = add(x = var_1882_cast_fp16, y = var_1888_cast_fp16)[name = string("current_value_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1894_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_202_cast_fp16)[name = string("op_1894_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_13_cast_fp16 = add(x = var_71_cast_fp16_6, y = var_1894_cast_fp16)[name = string("key_13_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_1896_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_202_cast_fp16)[name = string("op_1896_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_13_cast_fp16 = add(x = var_86_cast_fp16_6, y = var_1896_cast_fp16)[name = string("value_13_cast_fp16")];
+            tensor<int32, [4]> var_1899 = const()[name = string("op_1899"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_25_cast_fp16 = reshape(shape = var_1899, x = query_25_cast_fp16)[name = string("mh_q_25_cast_fp16")];
+            fp16 var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1902_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_1901_to_fp16)[name = string("op_1902_cast_fp16")];
+            tensor<int32, [4]> var_1903 = const()[name = string("op_1903"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1904_cast_fp16 = reshape(shape = var_1903, x = key_13_cast_fp16)[name = string("op_1904_cast_fp16")];
+            bool mh_w_49_transpose_x_0 = const()[name = string("mh_w_49_transpose_x_0"), val = bool(true)];
+            bool mh_w_49_transpose_y_0 = const()[name = string("mh_w_49_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_1902_cast_fp16, y = var_1904_cast_fp16)[name = string("mh_w_49_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_51_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_1912_cast_fp16 = softmax(axis = var_1804, x = mh_w_51_cast_fp16)[name = string("op_1912_cast_fp16")];
+            tensor<int32, [4]> var_1913 = const()[name = string("op_1913"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_1914_cast_fp16 = reshape(shape = var_1913, x = value_13_cast_fp16)[name = string("op_1914_cast_fp16")];
+            bool attn_25_transpose_x_0 = const()[name = string("attn_25_transpose_x_0"), val = bool(false)];
+            bool attn_25_transpose_y_0 = const()[name = string("attn_25_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_1914_cast_fp16, y = var_1912_cast_fp16)[name = string("attn_25_cast_fp16")];
+            tensor<int32, [4]> var_1917 = const()[name = string("op_1917"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_61_cast_fp16 = reshape(shape = var_1917, x = attn_25_cast_fp16)[name = string("input_61_cast_fp16")];
+            string var_1927_pad_type_0 = const()[name = string("op_1927_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1927_strides_0 = const()[name = string("op_1927_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1927_pad_0 = const()[name = string("op_1927_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1927_dilations_0 = const()[name = string("op_1927_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1927_groups_0 = const()[name = string("op_1927_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113463616))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113758592))))[name = string("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113758720)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1927_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1927_dilations_0, groups = var_1927_groups_0, pad = var_1927_pad_0, pad_type = var_1927_pad_type_0, strides = var_1927_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = string("op_1927_cast_fp16")];
+            string var_1933_pad_type_0 = const()[name = string("op_1933_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1933_strides_0 = const()[name = string("op_1933_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1933_pad_0 = const()[name = string("op_1933_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1933_dilations_0 = const()[name = string("op_1933_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1933_groups_0 = const()[name = string("op_1933_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113766464))), nonzero_data = tensor<fp16, [3021]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113760320))))[name = string("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1933_cast_fp16 = conv(dilations = var_1933_dilations_0, groups = var_1933_groups_0, pad = var_1933_pad_0, pad_type = var_1933_pad_type_0, strides = var_1933_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_61_cast_fp16)[name = string("op_1933_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_121_cast_fp16 = add(x = var_1927_cast_fp16, y = var_1933_cast_fp16)[name = string("obj_121_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_121_cast_fp16)[name = string("inputs_39_cast_fp16")];
+            tensor<int32, [1]> out_39_axes_0 = const()[name = string("out_39_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_1948_to_fp16 = const()[name = string("op_1948_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_1948_to_fp16, x = inputs_39_cast_fp16)[name = string("out_39_cast_fp16")];
+            tensor<fp16, [768]> obj_123_gamma_0_to_fp16 = const()[name = string("obj_123_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113840256)))];
+            tensor<fp16, [768]> obj_123_beta_0_to_fp16 = const()[name = string("obj_123_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113841856)))];
+            fp16 obj_123_epsilon_0_to_fp16 = const()[name = string("obj_123_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_123_cast_fp16 = batch_norm(beta = obj_123_beta_0_to_fp16, epsilon = obj_123_epsilon_0_to_fp16, gamma = obj_123_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_39_cast_fp16)[name = string("obj_123_cast_fp16")];
+            string var_1968_pad_type_0 = const()[name = string("op_1968_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1968_strides_0 = const()[name = string("op_1968_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1968_pad_0 = const()[name = string("op_1968_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1968_dilations_0 = const()[name = string("op_1968_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1968_groups_0 = const()[name = string("op_1968_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113843456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114138432))))[name = string("layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114138560)))];
+            tensor<fp16, [1, 768, 1, 1]> var_1968_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1968_dilations_0, groups = var_1968_groups_0, pad = var_1968_pad_0, pad_type = var_1968_pad_type_0, strides = var_1968_strides_0, weight = layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_123_cast_fp16)[name = string("op_1968_cast_fp16")];
+            string var_1974_pad_type_0 = const()[name = string("op_1974_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_1974_strides_0 = const()[name = string("op_1974_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_1974_pad_0 = const()[name = string("op_1974_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_1974_dilations_0 = const()[name = string("op_1974_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_1974_groups_0 = const()[name = string("op_1974_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114145088))), nonzero_data = tensor<fp16, [2414]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114140160))))[name = string("layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_1974_cast_fp16 = conv(dilations = var_1974_dilations_0, groups = var_1974_groups_0, pad = var_1974_pad_0, pad_type = var_1974_pad_type_0, strides = var_1974_strides_0, weight = layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_123_cast_fp16)[name = string("op_1974_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_27_cast_fp16 = add(x = var_1968_cast_fp16, y = var_1974_cast_fp16)[name = string("query_27_cast_fp16")];
+            tensor<int32, [4]> var_1977 = const()[name = string("op_1977"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_27_cast_fp16 = reshape(shape = var_1977, x = query_27_cast_fp16)[name = string("mh_q_27_cast_fp16")];
+            fp16 var_1979_to_fp16 = const()[name = string("op_1979_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_1980_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_1979_to_fp16)[name = string("op_1980_cast_fp16")];
+            tensor<int32, [4]> var_1981 = const()[name = string("op_1981"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1982_cast_fp16 = reshape(shape = var_1981, x = obj_125_cast_fp16)[name = string("op_1982_cast_fp16")];
+            bool mh_w_53_transpose_x_0 = const()[name = string("mh_w_53_transpose_x_0"), val = bool(true)];
+            bool mh_w_53_transpose_y_0 = const()[name = string("mh_w_53_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_1980_cast_fp16, y = var_1982_cast_fp16)[name = string("mh_w_53_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_55_cast_fp16 = add(x = mh_w_53_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_55_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_131_cast_fp16 = softmax(axis = var_1804, x = mh_w_55_cast_fp16)[name = string("obj_131_cast_fp16")];
+            tensor<int32, [4]> var_1991 = const()[name = string("op_1991"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_1992_cast_fp16 = reshape(shape = var_1991, x = obj_127_cast_fp16)[name = string("op_1992_cast_fp16")];
+            bool attn_27_transpose_x_0 = const()[name = string("attn_27_transpose_x_0"), val = bool(false)];
+            bool attn_27_transpose_y_0 = const()[name = string("attn_27_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_1992_cast_fp16, y = obj_131_cast_fp16)[name = string("attn_27_cast_fp16")];
+            tensor<int32, [4]> var_1995 = const()[name = string("op_1995"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_63_cast_fp16 = reshape(shape = var_1995, x = attn_27_cast_fp16)[name = string("input_63_cast_fp16")];
+            string var_2005_pad_type_0 = const()[name = string("op_2005_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2005_strides_0 = const()[name = string("op_2005_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2005_pad_0 = const()[name = string("op_2005_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2005_dilations_0 = const()[name = string("op_2005_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2005_groups_0 = const()[name = string("op_2005_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114218880))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114513856))))[name = string("layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114513984)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2005_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2005_dilations_0, groups = var_2005_groups_0, pad = var_2005_pad_0, pad_type = var_2005_pad_type_0, strides = var_2005_strides_0, weight = layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = string("op_2005_cast_fp16")];
+            string var_2011_pad_type_0 = const()[name = string("op_2011_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2011_strides_0 = const()[name = string("op_2011_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2011_pad_0 = const()[name = string("op_2011_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2011_dilations_0 = const()[name = string("op_2011_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2011_groups_0 = const()[name = string("op_2011_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114521856))), nonzero_data = tensor<fp16, [3078]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114515584))))[name = string("layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2011_cast_fp16 = conv(dilations = var_2011_dilations_0, groups = var_2011_groups_0, pad = var_2011_pad_0, pad_type = var_2011_pad_type_0, strides = var_2011_strides_0, weight = layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = string("op_2011_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_129_cast_fp16 = add(x = var_2005_cast_fp16, y = var_2011_cast_fp16)[name = string("obj_129_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_129_cast_fp16)[name = string("inputs_41_cast_fp16")];
+            tensor<int32, [1]> out_41_axes_0 = const()[name = string("out_41_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2022_to_fp16 = const()[name = string("op_2022_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_2022_to_fp16, x = inputs_41_cast_fp16)[name = string("out_41_cast_fp16")];
+            tensor<fp16, [768]> input_65_gamma_0_to_fp16 = const()[name = string("input_65_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114595648)))];
+            tensor<fp16, [768]> input_65_beta_0_to_fp16 = const()[name = string("input_65_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114597248)))];
+            fp16 input_65_epsilon_0_to_fp16 = const()[name = string("input_65_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_41_cast_fp16)[name = string("input_65_cast_fp16")];
+            string var_2040_pad_type_0 = const()[name = string("op_2040_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2040_strides_0 = const()[name = string("op_2040_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2040_pad_0 = const()[name = string("op_2040_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2040_dilations_0 = const()[name = string("op_2040_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2040_groups_0 = const()[name = string("op_2040_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(114598848))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115778560))))[name = string("layers_6_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115778688)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_2040_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_2040_dilations_0, groups = var_2040_groups_0, pad = var_2040_pad_0, pad_type = var_2040_pad_type_0, strides = var_2040_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = string("op_2040_cast_fp16")];
+            string var_2046_pad_type_0 = const()[name = string("op_2046_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2046_strides_0 = const()[name = string("op_2046_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2046_pad_0 = const()[name = string("op_2046_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2046_dilations_0 = const()[name = string("op_2046_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2046_groups_0 = const()[name = string("op_2046_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115804352))), nonzero_data = tensor<fp16, [9676]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115784896))))[name = string("layers_6_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_2046_cast_fp16 = conv(dilations = var_2046_dilations_0, groups = var_2046_groups_0, pad = var_2046_pad_0, pad_type = var_2046_pad_type_0, strides = var_2046_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = string("op_2046_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_67_cast_fp16 = add(x = var_2040_cast_fp16, y = var_2046_cast_fp16)[name = string("input_67_cast_fp16")];
+            string input_69_mode_0 = const()[name = string("input_69_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = string("input_69_cast_fp16")];
+            string var_2057_pad_type_0 = const()[name = string("op_2057_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2057_strides_0 = const()[name = string("op_2057_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2057_pad_0 = const()[name = string("op_2057_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2057_dilations_0 = const()[name = string("op_2057_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2057_groups_0 = const()[name = string("op_2057_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116099328))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117279040))))[name = string("layers_6_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117279168)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2057_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_2057_dilations_0, groups = var_2057_groups_0, pad = var_2057_pad_0, pad_type = var_2057_pad_type_0, strides = var_2057_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = string("op_2057_cast_fp16")];
+            string var_2063_pad_type_0 = const()[name = string("op_2063_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2063_strides_0 = const()[name = string("op_2063_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2063_pad_0 = const()[name = string("op_2063_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2063_dilations_0 = const()[name = string("op_2063_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2063_groups_0 = const()[name = string("op_2063_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117304960))), nonzero_data = tensor<fp16, [12055]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117280768))))[name = string("layers_6_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2063_cast_fp16 = conv(dilations = var_2063_dilations_0, groups = var_2063_groups_0, pad = var_2063_pad_0, pad_type = var_2063_pad_type_0, strides = var_2063_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_69_cast_fp16)[name = string("op_2063_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_15_cast_fp16 = add(x = var_2057_cast_fp16, y = var_2063_cast_fp16)[name = string("hidden_states_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = string("inputs_43_cast_fp16")];
+            tensor<int32, [4]> obj_143_begin_0 = const()[name = string("obj_143_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> obj_143_end_0 = const()[name = string("obj_143_end_0"), val = tensor<int32, [4]>([8, 768, 1, 1536])];
+            tensor<bool, [4]> obj_143_end_mask_0 = const()[name = string("obj_143_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_143_cast_fp16 = slice_by_index(begin = obj_143_begin_0, end = obj_143_end_0, end_mask = obj_143_end_mask_0, x = read_state_2)[name = string("obj_143_cast_fp16")];
+            tensor<int32, [4]> obj_145_begin_0 = const()[name = string("obj_145_begin_0"), val = tensor<int32, [4]>([7, 0, 0, 0])];
+            tensor<int32, [4]> obj_145_end_0 = const()[name = string("obj_145_end_0"), val = tensor<int32, [4]>([8, 768, 1, 1536])];
+            tensor<bool, [4]> obj_145_end_mask_0 = const()[name = string("obj_145_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_145_cast_fp16 = slice_by_index(begin = obj_145_begin_0, end = obj_145_end_0, end_mask = obj_145_end_mask_0, x = read_state_3)[name = string("obj_145_cast_fp16")];
+            int32 var_2085 = const()[name = string("op_2085"), val = int32(3)];
+            tensor<int32, [1]> out_43_axes_0 = const()[name = string("out_43_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2110_to_fp16 = const()[name = string("op_2110_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2110_to_fp16, x = inputs_43_cast_fp16)[name = string("out_43_cast_fp16")];
+            tensor<fp16, [768]> obj_133_gamma_0_to_fp16 = const()[name = string("obj_133_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117599936)))];
+            tensor<fp16, [768]> obj_133_beta_0_to_fp16 = const()[name = string("obj_133_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117601536)))];
+            fp16 obj_133_epsilon_0_to_fp16 = const()[name = string("obj_133_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_133_cast_fp16 = batch_norm(beta = obj_133_beta_0_to_fp16, epsilon = obj_133_epsilon_0_to_fp16, gamma = obj_133_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_43_cast_fp16)[name = string("obj_133_cast_fp16")];
+            string var_2132_pad_type_0 = const()[name = string("op_2132_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2132_strides_0 = const()[name = string("op_2132_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2132_pad_0 = const()[name = string("op_2132_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2132_dilations_0 = const()[name = string("op_2132_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2132_groups_0 = const()[name = string("op_2132_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117603136))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117898112))))[name = string("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117898240)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2132_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2132_dilations_0, groups = var_2132_groups_0, pad = var_2132_pad_0, pad_type = var_2132_pad_type_0, strides = var_2132_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2132_cast_fp16")];
+            string var_2138_pad_type_0 = const()[name = string("op_2138_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2138_strides_0 = const()[name = string("op_2138_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2138_pad_0 = const()[name = string("op_2138_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2138_dilations_0 = const()[name = string("op_2138_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2138_groups_0 = const()[name = string("op_2138_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117906816))), nonzero_data = tensor<fp16, [3426]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117899840))))[name = string("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2138_cast_fp16 = conv(dilations = var_2138_dilations_0, groups = var_2138_groups_0, pad = var_2138_pad_0, pad_type = var_2138_pad_type_0, strides = var_2138_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2138_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_29_cast_fp16 = add(x = var_2132_cast_fp16, y = var_2138_cast_fp16)[name = string("query_29_cast_fp16")];
+            string var_2147_pad_type_0 = const()[name = string("op_2147_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2147_strides_0 = const()[name = string("op_2147_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2147_pad_0 = const()[name = string("op_2147_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2147_dilations_0 = const()[name = string("op_2147_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2147_groups_0 = const()[name = string("op_2147_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(117980608))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118275584))))[name = string("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_2147_cast_fp16 = conv(dilations = var_2147_dilations_0, groups = var_2147_groups_0, pad = var_2147_pad_0, pad_type = var_2147_pad_type_0, strides = var_2147_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2147_cast_fp16")];
+            string var_2153_pad_type_0 = const()[name = string("op_2153_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2153_strides_0 = const()[name = string("op_2153_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2153_pad_0 = const()[name = string("op_2153_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2153_dilations_0 = const()[name = string("op_2153_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2153_groups_0 = const()[name = string("op_2153_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118283648))), nonzero_data = tensor<fp16, [3910]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118275712))))[name = string("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2153_cast_fp16 = conv(dilations = var_2153_dilations_0, groups = var_2153_groups_0, pad = var_2153_pad_0, pad_type = var_2153_pad_type_0, strides = var_2153_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2153_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_15_cast_fp16 = add(x = var_2147_cast_fp16, y = var_2153_cast_fp16)[name = string("current_key_15_cast_fp16")];
+            string var_2163_pad_type_0 = const()[name = string("op_2163_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2163_strides_0 = const()[name = string("op_2163_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2163_pad_0 = const()[name = string("op_2163_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2163_dilations_0 = const()[name = string("op_2163_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2163_groups_0 = const()[name = string("op_2163_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118357440))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118652416))))[name = string("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118652544)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2163_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2163_dilations_0, groups = var_2163_groups_0, pad = var_2163_pad_0, pad_type = var_2163_pad_type_0, strides = var_2163_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_133_cast_fp16)[name = string("op_2163_cast_fp16")];
+            string var_2169_pad_type_0 = const()[name = string("op_2169_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2169_strides_0 = const()[name = string("op_2169_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2169_pad_0 = const()[name = string("op_2169_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2169_dilations_0 = const()[name = string("op_2169_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2169_groups_0 = const()[name = string("op_2169_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118660608))), nonzero_data = tensor<fp16, [3196]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118654144))))[name = string("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2169_cast_fp16 = conv(dilations = var_2169_dilations_0, groups = var_2169_groups_0, pad = var_2169_pad_0, pad_type = var_2169_pad_type_0, strides = var_2169_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_133_cast_fp16)[name = string("op_2169_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_15_cast_fp16 = add(x = var_2163_cast_fp16, y = var_2169_cast_fp16)[name = string("current_value_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2175_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_202_cast_fp16)[name = string("op_2175_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_15_cast_fp16 = add(x = var_71_cast_fp16_7, y = var_2175_cast_fp16)[name = string("key_15_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2177_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_202_cast_fp16)[name = string("op_2177_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_15_cast_fp16 = add(x = var_86_cast_fp16_7, y = var_2177_cast_fp16)[name = string("value_15_cast_fp16")];
+            tensor<int32, [4]> var_2180 = const()[name = string("op_2180"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_29_cast_fp16 = reshape(shape = var_2180, x = query_29_cast_fp16)[name = string("mh_q_29_cast_fp16")];
+            fp16 var_2182_to_fp16 = const()[name = string("op_2182_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2183_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_2182_to_fp16)[name = string("op_2183_cast_fp16")];
+            tensor<int32, [4]> var_2184 = const()[name = string("op_2184"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2185_cast_fp16 = reshape(shape = var_2184, x = key_15_cast_fp16)[name = string("op_2185_cast_fp16")];
+            bool mh_w_57_transpose_x_0 = const()[name = string("mh_w_57_transpose_x_0"), val = bool(true)];
+            bool mh_w_57_transpose_y_0 = const()[name = string("mh_w_57_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_57_cast_fp16 = matmul(transpose_x = mh_w_57_transpose_x_0, transpose_y = mh_w_57_transpose_y_0, x = var_2183_cast_fp16, y = var_2185_cast_fp16)[name = string("mh_w_57_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_59_cast_fp16 = add(x = mh_w_57_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_59_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_2193_cast_fp16 = softmax(axis = var_2085, x = mh_w_59_cast_fp16)[name = string("op_2193_cast_fp16")];
+            tensor<int32, [4]> var_2194 = const()[name = string("op_2194"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2195_cast_fp16 = reshape(shape = var_2194, x = value_15_cast_fp16)[name = string("op_2195_cast_fp16")];
+            bool attn_29_transpose_x_0 = const()[name = string("attn_29_transpose_x_0"), val = bool(false)];
+            bool attn_29_transpose_y_0 = const()[name = string("attn_29_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2195_cast_fp16, y = var_2193_cast_fp16)[name = string("attn_29_cast_fp16")];
+            tensor<int32, [4]> var_2198 = const()[name = string("op_2198"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_71_cast_fp16 = reshape(shape = var_2198, x = attn_29_cast_fp16)[name = string("input_71_cast_fp16")];
+            string var_2208_pad_type_0 = const()[name = string("op_2208_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2208_strides_0 = const()[name = string("op_2208_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2208_pad_0 = const()[name = string("op_2208_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2208_dilations_0 = const()[name = string("op_2208_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2208_groups_0 = const()[name = string("op_2208_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118734400))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119029376))))[name = string("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119029504)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2208_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2208_dilations_0, groups = var_2208_groups_0, pad = var_2208_pad_0, pad_type = var_2208_pad_type_0, strides = var_2208_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = string("op_2208_cast_fp16")];
+            string var_2214_pad_type_0 = const()[name = string("op_2214_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2214_strides_0 = const()[name = string("op_2214_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2214_pad_0 = const()[name = string("op_2214_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2214_dilations_0 = const()[name = string("op_2214_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2214_groups_0 = const()[name = string("op_2214_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119036928))), nonzero_data = tensor<fp16, [2864]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119031104))))[name = string("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2214_cast_fp16 = conv(dilations = var_2214_dilations_0, groups = var_2214_groups_0, pad = var_2214_pad_0, pad_type = var_2214_pad_type_0, strides = var_2214_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = string("op_2214_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_139_cast_fp16 = add(x = var_2208_cast_fp16, y = var_2214_cast_fp16)[name = string("obj_139_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_139_cast_fp16)[name = string("inputs_45_cast_fp16")];
+            tensor<int32, [1]> out_45_axes_0 = const()[name = string("out_45_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2229_to_fp16 = const()[name = string("op_2229_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2229_to_fp16, x = inputs_45_cast_fp16)[name = string("out_45_cast_fp16")];
+            tensor<fp16, [768]> obj_141_gamma_0_to_fp16 = const()[name = string("obj_141_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119110720)))];
+            tensor<fp16, [768]> obj_141_beta_0_to_fp16 = const()[name = string("obj_141_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119112320)))];
+            fp16 obj_141_epsilon_0_to_fp16 = const()[name = string("obj_141_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_45_cast_fp16)[name = string("obj_141_cast_fp16")];
+            string var_2249_pad_type_0 = const()[name = string("op_2249_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2249_strides_0 = const()[name = string("op_2249_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2249_pad_0 = const()[name = string("op_2249_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2249_dilations_0 = const()[name = string("op_2249_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2249_groups_0 = const()[name = string("op_2249_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119113920))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119408896))))[name = string("layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119409024)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2249_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2249_dilations_0, groups = var_2249_groups_0, pad = var_2249_pad_0, pad_type = var_2249_pad_type_0, strides = var_2249_strides_0, weight = layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = string("op_2249_cast_fp16")];
+            string var_2255_pad_type_0 = const()[name = string("op_2255_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2255_strides_0 = const()[name = string("op_2255_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2255_pad_0 = const()[name = string("op_2255_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2255_dilations_0 = const()[name = string("op_2255_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2255_groups_0 = const()[name = string("op_2255_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119417088))), nonzero_data = tensor<fp16, [3198]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119410624))))[name = string("layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2255_cast_fp16 = conv(dilations = var_2255_dilations_0, groups = var_2255_groups_0, pad = var_2255_pad_0, pad_type = var_2255_pad_type_0, strides = var_2255_strides_0, weight = layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = string("op_2255_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_31_cast_fp16 = add(x = var_2249_cast_fp16, y = var_2255_cast_fp16)[name = string("query_31_cast_fp16")];
+            tensor<int32, [4]> var_2258 = const()[name = string("op_2258"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_31_cast_fp16 = reshape(shape = var_2258, x = query_31_cast_fp16)[name = string("mh_q_31_cast_fp16")];
+            fp16 var_2260_to_fp16 = const()[name = string("op_2260_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2261_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2260_to_fp16)[name = string("op_2261_cast_fp16")];
+            tensor<int32, [4]> var_2262 = const()[name = string("op_2262"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2263_cast_fp16 = reshape(shape = var_2262, x = obj_143_cast_fp16)[name = string("op_2263_cast_fp16")];
+            bool mh_w_61_transpose_x_0 = const()[name = string("mh_w_61_transpose_x_0"), val = bool(true)];
+            bool mh_w_61_transpose_y_0 = const()[name = string("mh_w_61_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_2261_cast_fp16, y = var_2263_cast_fp16)[name = string("mh_w_61_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_63_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_149_cast_fp16 = softmax(axis = var_2085, x = mh_w_63_cast_fp16)[name = string("obj_149_cast_fp16")];
+            tensor<int32, [4]> var_2272 = const()[name = string("op_2272"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2273_cast_fp16 = reshape(shape = var_2272, x = obj_145_cast_fp16)[name = string("op_2273_cast_fp16")];
+            bool attn_31_transpose_x_0 = const()[name = string("attn_31_transpose_x_0"), val = bool(false)];
+            bool attn_31_transpose_y_0 = const()[name = string("attn_31_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2273_cast_fp16, y = obj_149_cast_fp16)[name = string("attn_31_cast_fp16")];
+            tensor<int32, [4]> var_2276 = const()[name = string("op_2276"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_73_cast_fp16 = reshape(shape = var_2276, x = attn_31_cast_fp16)[name = string("input_73_cast_fp16")];
+            string var_2286_pad_type_0 = const()[name = string("op_2286_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2286_strides_0 = const()[name = string("op_2286_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2286_pad_0 = const()[name = string("op_2286_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2286_dilations_0 = const()[name = string("op_2286_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2286_groups_0 = const()[name = string("op_2286_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119490880))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119785856))))[name = string("layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119785984)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2286_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2286_dilations_0, groups = var_2286_groups_0, pad = var_2286_pad_0, pad_type = var_2286_pad_type_0, strides = var_2286_strides_0, weight = layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = string("op_2286_cast_fp16")];
+            string var_2292_pad_type_0 = const()[name = string("op_2292_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2292_strides_0 = const()[name = string("op_2292_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2292_pad_0 = const()[name = string("op_2292_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2292_dilations_0 = const()[name = string("op_2292_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2292_groups_0 = const()[name = string("op_2292_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119794240))), nonzero_data = tensor<fp16, [3278]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119787584))))[name = string("layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2292_cast_fp16 = conv(dilations = var_2292_dilations_0, groups = var_2292_groups_0, pad = var_2292_pad_0, pad_type = var_2292_pad_type_0, strides = var_2292_strides_0, weight = layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = string("op_2292_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_147_cast_fp16 = add(x = var_2286_cast_fp16, y = var_2292_cast_fp16)[name = string("obj_147_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_147_cast_fp16)[name = string("inputs_47_cast_fp16")];
+            tensor<int32, [1]> out_47_axes_0 = const()[name = string("out_47_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2303_to_fp16 = const()[name = string("op_2303_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2303_to_fp16, x = inputs_47_cast_fp16)[name = string("out_47_cast_fp16")];
+            tensor<fp16, [768]> input_75_gamma_0_to_fp16 = const()[name = string("input_75_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119868032)))];
+            tensor<fp16, [768]> input_75_beta_0_to_fp16 = const()[name = string("input_75_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119869632)))];
+            fp16 input_75_epsilon_0_to_fp16 = const()[name = string("input_75_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_47_cast_fp16)[name = string("input_75_cast_fp16")];
+            string var_2321_pad_type_0 = const()[name = string("op_2321_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2321_strides_0 = const()[name = string("op_2321_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2321_pad_0 = const()[name = string("op_2321_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2321_dilations_0 = const()[name = string("op_2321_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2321_groups_0 = const()[name = string("op_2321_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119871232))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121050944))))[name = string("layers_7_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121051072)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_2321_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_2321_dilations_0, groups = var_2321_groups_0, pad = var_2321_pad_0, pad_type = var_2321_pad_type_0, strides = var_2321_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = string("op_2321_cast_fp16")];
+            string var_2327_pad_type_0 = const()[name = string("op_2327_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2327_strides_0 = const()[name = string("op_2327_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2327_pad_0 = const()[name = string("op_2327_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2327_dilations_0 = const()[name = string("op_2327_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2327_groups_0 = const()[name = string("op_2327_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121076224))), nonzero_data = tensor<fp16, [9421]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121057280))))[name = string("layers_7_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_2327_cast_fp16 = conv(dilations = var_2327_dilations_0, groups = var_2327_groups_0, pad = var_2327_pad_0, pad_type = var_2327_pad_type_0, strides = var_2327_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = string("op_2327_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_77_cast_fp16 = add(x = var_2321_cast_fp16, y = var_2327_cast_fp16)[name = string("input_77_cast_fp16")];
+            string input_79_mode_0 = const()[name = string("input_79_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = string("input_79_cast_fp16")];
+            string var_2338_pad_type_0 = const()[name = string("op_2338_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2338_strides_0 = const()[name = string("op_2338_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2338_pad_0 = const()[name = string("op_2338_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2338_dilations_0 = const()[name = string("op_2338_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2338_groups_0 = const()[name = string("op_2338_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121371200))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122550912))))[name = string("layers_7_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122551040)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2338_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_2338_dilations_0, groups = var_2338_groups_0, pad = var_2338_pad_0, pad_type = var_2338_pad_type_0, strides = var_2338_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = string("op_2338_cast_fp16")];
+            string var_2344_pad_type_0 = const()[name = string("op_2344_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2344_strides_0 = const()[name = string("op_2344_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2344_pad_0 = const()[name = string("op_2344_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2344_dilations_0 = const()[name = string("op_2344_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2344_groups_0 = const()[name = string("op_2344_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122576704))), nonzero_data = tensor<fp16, [11993]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122552640))))[name = string("layers_7_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2344_cast_fp16 = conv(dilations = var_2344_dilations_0, groups = var_2344_groups_0, pad = var_2344_pad_0, pad_type = var_2344_pad_type_0, strides = var_2344_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = string("op_2344_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_17_cast_fp16 = add(x = var_2338_cast_fp16, y = var_2344_cast_fp16)[name = string("hidden_states_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = string("inputs_49_cast_fp16")];
+            tensor<int32, [4]> obj_161_begin_0 = const()[name = string("obj_161_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> obj_161_end_0 = const()[name = string("obj_161_end_0"), val = tensor<int32, [4]>([9, 768, 1, 1536])];
+            tensor<bool, [4]> obj_161_end_mask_0 = const()[name = string("obj_161_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_161_cast_fp16 = slice_by_index(begin = obj_161_begin_0, end = obj_161_end_0, end_mask = obj_161_end_mask_0, x = read_state_2)[name = string("obj_161_cast_fp16")];
+            tensor<int32, [4]> obj_163_begin_0 = const()[name = string("obj_163_begin_0"), val = tensor<int32, [4]>([8, 0, 0, 0])];
+            tensor<int32, [4]> obj_163_end_0 = const()[name = string("obj_163_end_0"), val = tensor<int32, [4]>([9, 768, 1, 1536])];
+            tensor<bool, [4]> obj_163_end_mask_0 = const()[name = string("obj_163_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_163_cast_fp16 = slice_by_index(begin = obj_163_begin_0, end = obj_163_end_0, end_mask = obj_163_end_mask_0, x = read_state_3)[name = string("obj_163_cast_fp16")];
+            int32 var_2366 = const()[name = string("op_2366"), val = int32(3)];
+            tensor<int32, [1]> out_49_axes_0 = const()[name = string("out_49_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2391_to_fp16 = const()[name = string("op_2391_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_2391_to_fp16, x = inputs_49_cast_fp16)[name = string("out_49_cast_fp16")];
+            tensor<fp16, [768]> obj_151_gamma_0_to_fp16 = const()[name = string("obj_151_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122871680)))];
+            tensor<fp16, [768]> obj_151_beta_0_to_fp16 = const()[name = string("obj_151_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122873280)))];
+            fp16 obj_151_epsilon_0_to_fp16 = const()[name = string("obj_151_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_151_cast_fp16 = batch_norm(beta = obj_151_beta_0_to_fp16, epsilon = obj_151_epsilon_0_to_fp16, gamma = obj_151_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_49_cast_fp16)[name = string("obj_151_cast_fp16")];
+            string var_2413_pad_type_0 = const()[name = string("op_2413_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2413_strides_0 = const()[name = string("op_2413_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2413_pad_0 = const()[name = string("op_2413_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2413_dilations_0 = const()[name = string("op_2413_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2413_groups_0 = const()[name = string("op_2413_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122874880))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123169856))))[name = string("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123169984)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2413_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2413_dilations_0, groups = var_2413_groups_0, pad = var_2413_pad_0, pad_type = var_2413_pad_type_0, strides = var_2413_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2413_cast_fp16")];
+            string var_2419_pad_type_0 = const()[name = string("op_2419_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2419_strides_0 = const()[name = string("op_2419_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2419_pad_0 = const()[name = string("op_2419_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2419_dilations_0 = const()[name = string("op_2419_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2419_groups_0 = const()[name = string("op_2419_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123176896))), nonzero_data = tensor<fp16, [2602]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123171584))))[name = string("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2419_cast_fp16 = conv(dilations = var_2419_dilations_0, groups = var_2419_groups_0, pad = var_2419_pad_0, pad_type = var_2419_pad_type_0, strides = var_2419_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2419_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_33_cast_fp16 = add(x = var_2413_cast_fp16, y = var_2419_cast_fp16)[name = string("query_33_cast_fp16")];
+            string var_2428_pad_type_0 = const()[name = string("op_2428_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2428_strides_0 = const()[name = string("op_2428_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2428_pad_0 = const()[name = string("op_2428_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2428_dilations_0 = const()[name = string("op_2428_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2428_groups_0 = const()[name = string("op_2428_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123250688))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123545664))))[name = string("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_2428_cast_fp16 = conv(dilations = var_2428_dilations_0, groups = var_2428_groups_0, pad = var_2428_pad_0, pad_type = var_2428_pad_type_0, strides = var_2428_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2428_cast_fp16")];
+            string var_2434_pad_type_0 = const()[name = string("op_2434_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2434_strides_0 = const()[name = string("op_2434_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2434_pad_0 = const()[name = string("op_2434_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2434_dilations_0 = const()[name = string("op_2434_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2434_groups_0 = const()[name = string("op_2434_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123551040))), nonzero_data = tensor<fp16, [2569]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123545792))))[name = string("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2434_cast_fp16 = conv(dilations = var_2434_dilations_0, groups = var_2434_groups_0, pad = var_2434_pad_0, pad_type = var_2434_pad_type_0, strides = var_2434_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2434_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_17_cast_fp16 = add(x = var_2428_cast_fp16, y = var_2434_cast_fp16)[name = string("current_key_17_cast_fp16")];
+            string var_2444_pad_type_0 = const()[name = string("op_2444_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2444_strides_0 = const()[name = string("op_2444_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2444_pad_0 = const()[name = string("op_2444_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2444_dilations_0 = const()[name = string("op_2444_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2444_groups_0 = const()[name = string("op_2444_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123624832))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123919808))))[name = string("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123919936)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2444_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2444_dilations_0, groups = var_2444_groups_0, pad = var_2444_pad_0, pad_type = var_2444_pad_type_0, strides = var_2444_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_151_cast_fp16)[name = string("op_2444_cast_fp16")];
+            string var_2450_pad_type_0 = const()[name = string("op_2450_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2450_strides_0 = const()[name = string("op_2450_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2450_pad_0 = const()[name = string("op_2450_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2450_dilations_0 = const()[name = string("op_2450_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2450_groups_0 = const()[name = string("op_2450_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123926656))), nonzero_data = tensor<fp16, [2511]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(123921536))))[name = string("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2450_cast_fp16 = conv(dilations = var_2450_dilations_0, groups = var_2450_groups_0, pad = var_2450_pad_0, pad_type = var_2450_pad_type_0, strides = var_2450_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_151_cast_fp16)[name = string("op_2450_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_17_cast_fp16 = add(x = var_2444_cast_fp16, y = var_2450_cast_fp16)[name = string("current_value_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2456_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_202_cast_fp16)[name = string("op_2456_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_17_cast_fp16 = add(x = var_71_cast_fp16_8, y = var_2456_cast_fp16)[name = string("key_17_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2458_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_202_cast_fp16)[name = string("op_2458_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_17_cast_fp16 = add(x = var_86_cast_fp16_8, y = var_2458_cast_fp16)[name = string("value_17_cast_fp16")];
+            tensor<int32, [4]> var_2461 = const()[name = string("op_2461"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_33_cast_fp16 = reshape(shape = var_2461, x = query_33_cast_fp16)[name = string("mh_q_33_cast_fp16")];
+            fp16 var_2463_to_fp16 = const()[name = string("op_2463_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2464_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_2463_to_fp16)[name = string("op_2464_cast_fp16")];
+            tensor<int32, [4]> var_2465 = const()[name = string("op_2465"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2466_cast_fp16 = reshape(shape = var_2465, x = key_17_cast_fp16)[name = string("op_2466_cast_fp16")];
+            bool mh_w_65_transpose_x_0 = const()[name = string("mh_w_65_transpose_x_0"), val = bool(true)];
+            bool mh_w_65_transpose_y_0 = const()[name = string("mh_w_65_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_2464_cast_fp16, y = var_2466_cast_fp16)[name = string("mh_w_65_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_67_cast_fp16 = add(x = mh_w_65_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_67_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_2474_cast_fp16 = softmax(axis = var_2366, x = mh_w_67_cast_fp16)[name = string("op_2474_cast_fp16")];
+            tensor<int32, [4]> var_2475 = const()[name = string("op_2475"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2476_cast_fp16 = reshape(shape = var_2475, x = value_17_cast_fp16)[name = string("op_2476_cast_fp16")];
+            bool attn_33_transpose_x_0 = const()[name = string("attn_33_transpose_x_0"), val = bool(false)];
+            bool attn_33_transpose_y_0 = const()[name = string("attn_33_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_2476_cast_fp16, y = var_2474_cast_fp16)[name = string("attn_33_cast_fp16")];
+            tensor<int32, [4]> var_2479 = const()[name = string("op_2479"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_81_cast_fp16 = reshape(shape = var_2479, x = attn_33_cast_fp16)[name = string("input_81_cast_fp16")];
+            string var_2489_pad_type_0 = const()[name = string("op_2489_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2489_strides_0 = const()[name = string("op_2489_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2489_pad_0 = const()[name = string("op_2489_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2489_dilations_0 = const()[name = string("op_2489_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2489_groups_0 = const()[name = string("op_2489_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124000448))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124295424))))[name = string("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124295552)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2489_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2489_dilations_0, groups = var_2489_groups_0, pad = var_2489_pad_0, pad_type = var_2489_pad_type_0, strides = var_2489_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = string("op_2489_cast_fp16")];
+            string var_2495_pad_type_0 = const()[name = string("op_2495_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2495_strides_0 = const()[name = string("op_2495_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2495_pad_0 = const()[name = string("op_2495_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2495_dilations_0 = const()[name = string("op_2495_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2495_groups_0 = const()[name = string("op_2495_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124303104))), nonzero_data = tensor<fp16, [2943]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124297152))))[name = string("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2495_cast_fp16 = conv(dilations = var_2495_dilations_0, groups = var_2495_groups_0, pad = var_2495_pad_0, pad_type = var_2495_pad_type_0, strides = var_2495_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = string("op_2495_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_157_cast_fp16 = add(x = var_2489_cast_fp16, y = var_2495_cast_fp16)[name = string("obj_157_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_157_cast_fp16)[name = string("inputs_51_cast_fp16")];
+            tensor<int32, [1]> out_51_axes_0 = const()[name = string("out_51_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2510_to_fp16 = const()[name = string("op_2510_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2510_to_fp16, x = inputs_51_cast_fp16)[name = string("out_51_cast_fp16")];
+            tensor<fp16, [768]> obj_159_gamma_0_to_fp16 = const()[name = string("obj_159_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124376896)))];
+            tensor<fp16, [768]> obj_159_beta_0_to_fp16 = const()[name = string("obj_159_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124378496)))];
+            fp16 obj_159_epsilon_0_to_fp16 = const()[name = string("obj_159_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_159_cast_fp16 = batch_norm(beta = obj_159_beta_0_to_fp16, epsilon = obj_159_epsilon_0_to_fp16, gamma = obj_159_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_51_cast_fp16)[name = string("obj_159_cast_fp16")];
+            string var_2530_pad_type_0 = const()[name = string("op_2530_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2530_strides_0 = const()[name = string("op_2530_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2530_pad_0 = const()[name = string("op_2530_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2530_dilations_0 = const()[name = string("op_2530_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2530_groups_0 = const()[name = string("op_2530_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124380096))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124675072))))[name = string("layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124675200)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2530_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2530_dilations_0, groups = var_2530_groups_0, pad = var_2530_pad_0, pad_type = var_2530_pad_type_0, strides = var_2530_strides_0, weight = layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_159_cast_fp16)[name = string("op_2530_cast_fp16")];
+            string var_2536_pad_type_0 = const()[name = string("op_2536_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2536_strides_0 = const()[name = string("op_2536_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2536_pad_0 = const()[name = string("op_2536_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2536_dilations_0 = const()[name = string("op_2536_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2536_groups_0 = const()[name = string("op_2536_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124682304))), nonzero_data = tensor<fp16, [2697]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124676800))))[name = string("layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2536_cast_fp16 = conv(dilations = var_2536_dilations_0, groups = var_2536_groups_0, pad = var_2536_pad_0, pad_type = var_2536_pad_type_0, strides = var_2536_strides_0, weight = layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_159_cast_fp16)[name = string("op_2536_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_35_cast_fp16 = add(x = var_2530_cast_fp16, y = var_2536_cast_fp16)[name = string("query_35_cast_fp16")];
+            tensor<int32, [4]> var_2539 = const()[name = string("op_2539"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_35_cast_fp16 = reshape(shape = var_2539, x = query_35_cast_fp16)[name = string("mh_q_35_cast_fp16")];
+            fp16 var_2541_to_fp16 = const()[name = string("op_2541_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2542_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_2541_to_fp16)[name = string("op_2542_cast_fp16")];
+            tensor<int32, [4]> var_2543 = const()[name = string("op_2543"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2544_cast_fp16 = reshape(shape = var_2543, x = obj_161_cast_fp16)[name = string("op_2544_cast_fp16")];
+            bool mh_w_69_transpose_x_0 = const()[name = string("mh_w_69_transpose_x_0"), val = bool(true)];
+            bool mh_w_69_transpose_y_0 = const()[name = string("mh_w_69_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_69_cast_fp16 = matmul(transpose_x = mh_w_69_transpose_x_0, transpose_y = mh_w_69_transpose_y_0, x = var_2542_cast_fp16, y = var_2544_cast_fp16)[name = string("mh_w_69_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_71_cast_fp16 = add(x = mh_w_69_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_71_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_167_cast_fp16 = softmax(axis = var_2366, x = mh_w_71_cast_fp16)[name = string("obj_167_cast_fp16")];
+            tensor<int32, [4]> var_2553 = const()[name = string("op_2553"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2554_cast_fp16 = reshape(shape = var_2553, x = obj_163_cast_fp16)[name = string("op_2554_cast_fp16")];
+            bool attn_35_transpose_x_0 = const()[name = string("attn_35_transpose_x_0"), val = bool(false)];
+            bool attn_35_transpose_y_0 = const()[name = string("attn_35_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2554_cast_fp16, y = obj_167_cast_fp16)[name = string("attn_35_cast_fp16")];
+            tensor<int32, [4]> var_2557 = const()[name = string("op_2557"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_83_cast_fp16 = reshape(shape = var_2557, x = attn_35_cast_fp16)[name = string("input_83_cast_fp16")];
+            string var_2567_pad_type_0 = const()[name = string("op_2567_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2567_strides_0 = const()[name = string("op_2567_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2567_pad_0 = const()[name = string("op_2567_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2567_dilations_0 = const()[name = string("op_2567_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2567_groups_0 = const()[name = string("op_2567_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124756096))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125051072))))[name = string("layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125051200)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2567_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2567_dilations_0, groups = var_2567_groups_0, pad = var_2567_pad_0, pad_type = var_2567_pad_type_0, strides = var_2567_strides_0, weight = layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = string("op_2567_cast_fp16")];
+            string var_2573_pad_type_0 = const()[name = string("op_2573_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2573_strides_0 = const()[name = string("op_2573_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2573_pad_0 = const()[name = string("op_2573_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2573_dilations_0 = const()[name = string("op_2573_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2573_groups_0 = const()[name = string("op_2573_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125057536))), nonzero_data = tensor<fp16, [2329]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125052800))))[name = string("layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2573_cast_fp16 = conv(dilations = var_2573_dilations_0, groups = var_2573_groups_0, pad = var_2573_pad_0, pad_type = var_2573_pad_type_0, strides = var_2573_strides_0, weight = layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = string("op_2573_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_165_cast_fp16 = add(x = var_2567_cast_fp16, y = var_2573_cast_fp16)[name = string("obj_165_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_165_cast_fp16)[name = string("inputs_53_cast_fp16")];
+            tensor<int32, [1]> out_53_axes_0 = const()[name = string("out_53_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2587_to_fp16 = const()[name = string("op_2587_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2587_to_fp16, x = inputs_53_cast_fp16)[name = string("out_53_cast_fp16")];
+            tensor<fp16, [768]> input_85_gamma_0_to_fp16 = const()[name = string("input_85_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125131328)))];
+            tensor<fp16, [768]> input_85_beta_0_to_fp16 = const()[name = string("input_85_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125132928)))];
+            fp16 input_85_epsilon_0_to_fp16 = const()[name = string("input_85_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_53_cast_fp16)[name = string("input_85_cast_fp16")];
+            string var_2605_pad_type_0 = const()[name = string("op_2605_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2605_strides_0 = const()[name = string("op_2605_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2605_pad_0 = const()[name = string("op_2605_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2605_dilations_0 = const()[name = string("op_2605_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2605_groups_0 = const()[name = string("op_2605_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(125134528))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126314240))))[name = string("layers_8_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126314368)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_2605_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_2605_dilations_0, groups = var_2605_groups_0, pad = var_2605_pad_0, pad_type = var_2605_pad_type_0, strides = var_2605_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = string("op_2605_cast_fp16")];
+            string var_2611_pad_type_0 = const()[name = string("op_2611_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2611_strides_0 = const()[name = string("op_2611_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2611_pad_0 = const()[name = string("op_2611_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2611_dilations_0 = const()[name = string("op_2611_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2611_groups_0 = const()[name = string("op_2611_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126339264))), nonzero_data = tensor<fp16, [9296]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126320576))))[name = string("layers_8_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_2611_cast_fp16 = conv(dilations = var_2611_dilations_0, groups = var_2611_groups_0, pad = var_2611_pad_0, pad_type = var_2611_pad_type_0, strides = var_2611_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_85_cast_fp16)[name = string("op_2611_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_87_cast_fp16 = add(x = var_2605_cast_fp16, y = var_2611_cast_fp16)[name = string("input_87_cast_fp16")];
+            string input_89_mode_0 = const()[name = string("input_89_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = string("input_89_cast_fp16")];
+            string var_2622_pad_type_0 = const()[name = string("op_2622_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2622_strides_0 = const()[name = string("op_2622_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2622_pad_0 = const()[name = string("op_2622_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2622_dilations_0 = const()[name = string("op_2622_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2622_groups_0 = const()[name = string("op_2622_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126634240))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127813952))))[name = string("layers_8_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127814080)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2622_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_2622_dilations_0, groups = var_2622_groups_0, pad = var_2622_pad_0, pad_type = var_2622_pad_type_0, strides = var_2622_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = string("op_2622_cast_fp16")];
+            string var_2628_pad_type_0 = const()[name = string("op_2628_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2628_strides_0 = const()[name = string("op_2628_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2628_pad_0 = const()[name = string("op_2628_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2628_dilations_0 = const()[name = string("op_2628_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2628_groups_0 = const()[name = string("op_2628_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127840320))), nonzero_data = tensor<fp16, [12278]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127815680))))[name = string("layers_8_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2628_cast_fp16 = conv(dilations = var_2628_dilations_0, groups = var_2628_groups_0, pad = var_2628_pad_0, pad_type = var_2628_pad_type_0, strides = var_2628_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = string("op_2628_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_19_cast_fp16 = add(x = var_2622_cast_fp16, y = var_2628_cast_fp16)[name = string("hidden_states_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = string("inputs_55_cast_fp16")];
+            tensor<int32, [4]> obj_179_begin_0 = const()[name = string("obj_179_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> obj_179_end_0 = const()[name = string("obj_179_end_0"), val = tensor<int32, [4]>([10, 768, 1, 1536])];
+            tensor<bool, [4]> obj_179_end_mask_0 = const()[name = string("obj_179_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_179_cast_fp16 = slice_by_index(begin = obj_179_begin_0, end = obj_179_end_0, end_mask = obj_179_end_mask_0, x = read_state_2)[name = string("obj_179_cast_fp16")];
+            tensor<int32, [4]> obj_181_begin_0 = const()[name = string("obj_181_begin_0"), val = tensor<int32, [4]>([9, 0, 0, 0])];
+            tensor<int32, [4]> obj_181_end_0 = const()[name = string("obj_181_end_0"), val = tensor<int32, [4]>([10, 768, 1, 1536])];
+            tensor<bool, [4]> obj_181_end_mask_0 = const()[name = string("obj_181_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_181_cast_fp16 = slice_by_index(begin = obj_181_begin_0, end = obj_181_end_0, end_mask = obj_181_end_mask_0, x = read_state_3)[name = string("obj_181_cast_fp16")];
+            int32 var_2651 = const()[name = string("op_2651"), val = int32(3)];
+            tensor<int32, [1]> out_55_axes_0 = const()[name = string("out_55_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2676_to_fp16 = const()[name = string("op_2676_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2676_to_fp16, x = inputs_55_cast_fp16)[name = string("out_55_cast_fp16")];
+            tensor<fp16, [768]> obj_169_gamma_0_to_fp16 = const()[name = string("obj_169_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128135296)))];
+            tensor<fp16, [768]> obj_169_beta_0_to_fp16 = const()[name = string("obj_169_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128136896)))];
+            fp16 obj_169_epsilon_0_to_fp16 = const()[name = string("obj_169_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_169_cast_fp16 = batch_norm(beta = obj_169_beta_0_to_fp16, epsilon = obj_169_epsilon_0_to_fp16, gamma = obj_169_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_55_cast_fp16)[name = string("obj_169_cast_fp16")];
+            string var_2698_pad_type_0 = const()[name = string("op_2698_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2698_strides_0 = const()[name = string("op_2698_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2698_pad_0 = const()[name = string("op_2698_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2698_dilations_0 = const()[name = string("op_2698_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2698_groups_0 = const()[name = string("op_2698_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128138496))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128433472))))[name = string("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128433600)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2698_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2698_dilations_0, groups = var_2698_groups_0, pad = var_2698_pad_0, pad_type = var_2698_pad_type_0, strides = var_2698_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2698_cast_fp16")];
+            string var_2704_pad_type_0 = const()[name = string("op_2704_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2704_strides_0 = const()[name = string("op_2704_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2704_pad_0 = const()[name = string("op_2704_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2704_dilations_0 = const()[name = string("op_2704_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2704_groups_0 = const()[name = string("op_2704_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128440768))), nonzero_data = tensor<fp16, [2721]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128435200))))[name = string("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2704_cast_fp16 = conv(dilations = var_2704_dilations_0, groups = var_2704_groups_0, pad = var_2704_pad_0, pad_type = var_2704_pad_type_0, strides = var_2704_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2704_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_37_cast_fp16 = add(x = var_2698_cast_fp16, y = var_2704_cast_fp16)[name = string("query_37_cast_fp16")];
+            string var_2713_pad_type_0 = const()[name = string("op_2713_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2713_strides_0 = const()[name = string("op_2713_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2713_pad_0 = const()[name = string("op_2713_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2713_dilations_0 = const()[name = string("op_2713_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2713_groups_0 = const()[name = string("op_2713_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128514560))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128809536))))[name = string("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_2713_cast_fp16 = conv(dilations = var_2713_dilations_0, groups = var_2713_groups_0, pad = var_2713_pad_0, pad_type = var_2713_pad_type_0, strides = var_2713_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2713_cast_fp16")];
+            string var_2719_pad_type_0 = const()[name = string("op_2719_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2719_strides_0 = const()[name = string("op_2719_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2719_pad_0 = const()[name = string("op_2719_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2719_dilations_0 = const()[name = string("op_2719_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2719_groups_0 = const()[name = string("op_2719_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128815488))), nonzero_data = tensor<fp16, [2856]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128809664))))[name = string("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2719_cast_fp16 = conv(dilations = var_2719_dilations_0, groups = var_2719_groups_0, pad = var_2719_pad_0, pad_type = var_2719_pad_type_0, strides = var_2719_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2719_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_19_cast_fp16 = add(x = var_2713_cast_fp16, y = var_2719_cast_fp16)[name = string("current_key_19_cast_fp16")];
+            string var_2729_pad_type_0 = const()[name = string("op_2729_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2729_strides_0 = const()[name = string("op_2729_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2729_pad_0 = const()[name = string("op_2729_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2729_dilations_0 = const()[name = string("op_2729_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2729_groups_0 = const()[name = string("op_2729_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(128889280))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129184256))))[name = string("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129184384)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2729_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2729_dilations_0, groups = var_2729_groups_0, pad = var_2729_pad_0, pad_type = var_2729_pad_type_0, strides = var_2729_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_169_cast_fp16)[name = string("op_2729_cast_fp16")];
+            string var_2735_pad_type_0 = const()[name = string("op_2735_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2735_strides_0 = const()[name = string("op_2735_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2735_pad_0 = const()[name = string("op_2735_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2735_dilations_0 = const()[name = string("op_2735_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2735_groups_0 = const()[name = string("op_2735_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129193664))), nonzero_data = tensor<fp16, [3785]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129185984))))[name = string("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2735_cast_fp16 = conv(dilations = var_2735_dilations_0, groups = var_2735_groups_0, pad = var_2735_pad_0, pad_type = var_2735_pad_type_0, strides = var_2735_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_169_cast_fp16)[name = string("op_2735_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_19_cast_fp16 = add(x = var_2729_cast_fp16, y = var_2735_cast_fp16)[name = string("current_value_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2741_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_202_cast_fp16)[name = string("op_2741_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_19_cast_fp16 = add(x = var_71_cast_fp16_9, y = var_2741_cast_fp16)[name = string("key_19_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_2743_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_202_cast_fp16)[name = string("op_2743_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_19_cast_fp16 = add(x = var_86_cast_fp16_9, y = var_2743_cast_fp16)[name = string("value_19_cast_fp16")];
+            tensor<int32, [4]> var_2746 = const()[name = string("op_2746"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_37_cast_fp16 = reshape(shape = var_2746, x = query_37_cast_fp16)[name = string("mh_q_37_cast_fp16")];
+            fp16 var_2748_to_fp16 = const()[name = string("op_2748_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2749_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_2748_to_fp16)[name = string("op_2749_cast_fp16")];
+            tensor<int32, [4]> var_2750 = const()[name = string("op_2750"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2751_cast_fp16 = reshape(shape = var_2750, x = key_19_cast_fp16)[name = string("op_2751_cast_fp16")];
+            bool mh_w_73_transpose_x_0 = const()[name = string("mh_w_73_transpose_x_0"), val = bool(true)];
+            bool mh_w_73_transpose_y_0 = const()[name = string("mh_w_73_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_73_cast_fp16 = matmul(transpose_x = mh_w_73_transpose_x_0, transpose_y = mh_w_73_transpose_y_0, x = var_2749_cast_fp16, y = var_2751_cast_fp16)[name = string("mh_w_73_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_75_cast_fp16 = add(x = mh_w_73_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_75_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_2759_cast_fp16 = softmax(axis = var_2651, x = mh_w_75_cast_fp16)[name = string("op_2759_cast_fp16")];
+            tensor<int32, [4]> var_2760 = const()[name = string("op_2760"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_2761_cast_fp16 = reshape(shape = var_2760, x = value_19_cast_fp16)[name = string("op_2761_cast_fp16")];
+            bool attn_37_transpose_x_0 = const()[name = string("attn_37_transpose_x_0"), val = bool(false)];
+            bool attn_37_transpose_y_0 = const()[name = string("attn_37_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_2761_cast_fp16, y = var_2759_cast_fp16)[name = string("attn_37_cast_fp16")];
+            tensor<int32, [4]> var_2764 = const()[name = string("op_2764"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_91_cast_fp16 = reshape(shape = var_2764, x = attn_37_cast_fp16)[name = string("input_91_cast_fp16")];
+            string var_2774_pad_type_0 = const()[name = string("op_2774_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2774_strides_0 = const()[name = string("op_2774_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2774_pad_0 = const()[name = string("op_2774_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2774_dilations_0 = const()[name = string("op_2774_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2774_groups_0 = const()[name = string("op_2774_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129267456))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129562432))))[name = string("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129562560)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2774_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2774_dilations_0, groups = var_2774_groups_0, pad = var_2774_pad_0, pad_type = var_2774_pad_type_0, strides = var_2774_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = string("op_2774_cast_fp16")];
+            string var_2780_pad_type_0 = const()[name = string("op_2780_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2780_strides_0 = const()[name = string("op_2780_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2780_pad_0 = const()[name = string("op_2780_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2780_dilations_0 = const()[name = string("op_2780_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2780_groups_0 = const()[name = string("op_2780_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129571904))), nonzero_data = tensor<fp16, [3827]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129564160))))[name = string("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2780_cast_fp16 = conv(dilations = var_2780_dilations_0, groups = var_2780_groups_0, pad = var_2780_pad_0, pad_type = var_2780_pad_type_0, strides = var_2780_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = string("op_2780_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_175_cast_fp16 = add(x = var_2774_cast_fp16, y = var_2780_cast_fp16)[name = string("obj_175_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_175_cast_fp16)[name = string("inputs_57_cast_fp16")];
+            tensor<int32, [1]> out_57_axes_0 = const()[name = string("out_57_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2795_to_fp16 = const()[name = string("op_2795_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_2795_to_fp16, x = inputs_57_cast_fp16)[name = string("out_57_cast_fp16")];
+            tensor<fp16, [768]> obj_177_gamma_0_to_fp16 = const()[name = string("obj_177_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129645696)))];
+            tensor<fp16, [768]> obj_177_beta_0_to_fp16 = const()[name = string("obj_177_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129647296)))];
+            fp16 obj_177_epsilon_0_to_fp16 = const()[name = string("obj_177_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_177_cast_fp16 = batch_norm(beta = obj_177_beta_0_to_fp16, epsilon = obj_177_epsilon_0_to_fp16, gamma = obj_177_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_57_cast_fp16)[name = string("obj_177_cast_fp16")];
+            string var_2815_pad_type_0 = const()[name = string("op_2815_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2815_strides_0 = const()[name = string("op_2815_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2815_pad_0 = const()[name = string("op_2815_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2815_dilations_0 = const()[name = string("op_2815_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2815_groups_0 = const()[name = string("op_2815_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129648896))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129943872))))[name = string("layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129944000)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2815_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2815_dilations_0, groups = var_2815_groups_0, pad = var_2815_pad_0, pad_type = var_2815_pad_type_0, strides = var_2815_strides_0, weight = layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_177_cast_fp16)[name = string("op_2815_cast_fp16")];
+            string var_2821_pad_type_0 = const()[name = string("op_2821_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2821_strides_0 = const()[name = string("op_2821_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2821_pad_0 = const()[name = string("op_2821_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2821_dilations_0 = const()[name = string("op_2821_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2821_groups_0 = const()[name = string("op_2821_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129950784))), nonzero_data = tensor<fp16, [2545]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(129945600))))[name = string("layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2821_cast_fp16 = conv(dilations = var_2821_dilations_0, groups = var_2821_groups_0, pad = var_2821_pad_0, pad_type = var_2821_pad_type_0, strides = var_2821_strides_0, weight = layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_177_cast_fp16)[name = string("op_2821_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_39_cast_fp16 = add(x = var_2815_cast_fp16, y = var_2821_cast_fp16)[name = string("query_39_cast_fp16")];
+            tensor<int32, [4]> var_2824 = const()[name = string("op_2824"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_39_cast_fp16 = reshape(shape = var_2824, x = query_39_cast_fp16)[name = string("mh_q_39_cast_fp16")];
+            fp16 var_2826_to_fp16 = const()[name = string("op_2826_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_2827_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_2826_to_fp16)[name = string("op_2827_cast_fp16")];
+            tensor<int32, [4]> var_2828 = const()[name = string("op_2828"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2829_cast_fp16 = reshape(shape = var_2828, x = obj_179_cast_fp16)[name = string("op_2829_cast_fp16")];
+            bool mh_w_77_transpose_x_0 = const()[name = string("mh_w_77_transpose_x_0"), val = bool(true)];
+            bool mh_w_77_transpose_y_0 = const()[name = string("mh_w_77_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_77_cast_fp16 = matmul(transpose_x = mh_w_77_transpose_x_0, transpose_y = mh_w_77_transpose_y_0, x = var_2827_cast_fp16, y = var_2829_cast_fp16)[name = string("mh_w_77_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_79_cast_fp16 = add(x = mh_w_77_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_79_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_185_cast_fp16 = softmax(axis = var_2651, x = mh_w_79_cast_fp16)[name = string("obj_185_cast_fp16")];
+            tensor<int32, [4]> var_2838 = const()[name = string("op_2838"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_2839_cast_fp16 = reshape(shape = var_2838, x = obj_181_cast_fp16)[name = string("op_2839_cast_fp16")];
+            bool attn_39_transpose_x_0 = const()[name = string("attn_39_transpose_x_0"), val = bool(false)];
+            bool attn_39_transpose_y_0 = const()[name = string("attn_39_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_2839_cast_fp16, y = obj_185_cast_fp16)[name = string("attn_39_cast_fp16")];
+            tensor<int32, [4]> var_2842 = const()[name = string("op_2842"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_93_cast_fp16 = reshape(shape = var_2842, x = attn_39_cast_fp16)[name = string("input_93_cast_fp16")];
+            string var_2852_pad_type_0 = const()[name = string("op_2852_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2852_strides_0 = const()[name = string("op_2852_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2852_pad_0 = const()[name = string("op_2852_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2852_dilations_0 = const()[name = string("op_2852_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2852_groups_0 = const()[name = string("op_2852_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130024576))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130319552))))[name = string("layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130319680)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2852_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2852_dilations_0, groups = var_2852_groups_0, pad = var_2852_pad_0, pad_type = var_2852_pad_type_0, strides = var_2852_strides_0, weight = layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = string("op_2852_cast_fp16")];
+            string var_2858_pad_type_0 = const()[name = string("op_2858_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2858_strides_0 = const()[name = string("op_2858_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2858_pad_0 = const()[name = string("op_2858_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2858_dilations_0 = const()[name = string("op_2858_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2858_groups_0 = const()[name = string("op_2858_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130326784))), nonzero_data = tensor<fp16, [2714]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130321280))))[name = string("layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2858_cast_fp16 = conv(dilations = var_2858_dilations_0, groups = var_2858_groups_0, pad = var_2858_pad_0, pad_type = var_2858_pad_type_0, strides = var_2858_strides_0, weight = layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_93_cast_fp16)[name = string("op_2858_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_183_cast_fp16 = add(x = var_2852_cast_fp16, y = var_2858_cast_fp16)[name = string("obj_183_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_183_cast_fp16)[name = string("inputs_59_cast_fp16")];
+            tensor<int32, [1]> out_59_axes_0 = const()[name = string("out_59_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2872_to_fp16 = const()[name = string("op_2872_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_2872_to_fp16, x = inputs_59_cast_fp16)[name = string("out_59_cast_fp16")];
+            tensor<fp16, [768]> input_95_gamma_0_to_fp16 = const()[name = string("input_95_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130400576)))];
+            tensor<fp16, [768]> input_95_beta_0_to_fp16 = const()[name = string("input_95_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130402176)))];
+            fp16 input_95_epsilon_0_to_fp16 = const()[name = string("input_95_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_59_cast_fp16)[name = string("input_95_cast_fp16")];
+            string var_2890_pad_type_0 = const()[name = string("op_2890_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2890_strides_0 = const()[name = string("op_2890_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2890_pad_0 = const()[name = string("op_2890_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2890_dilations_0 = const()[name = string("op_2890_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2890_groups_0 = const()[name = string("op_2890_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130403776))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131583488))))[name = string("layers_9_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131583616)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_2890_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_2890_dilations_0, groups = var_2890_groups_0, pad = var_2890_pad_0, pad_type = var_2890_pad_type_0, strides = var_2890_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = string("op_2890_cast_fp16")];
+            string var_2896_pad_type_0 = const()[name = string("op_2896_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2896_strides_0 = const()[name = string("op_2896_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2896_pad_0 = const()[name = string("op_2896_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2896_dilations_0 = const()[name = string("op_2896_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2896_groups_0 = const()[name = string("op_2896_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131606976))), nonzero_data = tensor<fp16, [8519]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131589824))))[name = string("layers_9_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_2896_cast_fp16 = conv(dilations = var_2896_dilations_0, groups = var_2896_groups_0, pad = var_2896_pad_0, pad_type = var_2896_pad_type_0, strides = var_2896_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = string("op_2896_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_97_cast_fp16 = add(x = var_2890_cast_fp16, y = var_2896_cast_fp16)[name = string("input_97_cast_fp16")];
+            string input_99_mode_0 = const()[name = string("input_99_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = string("input_99_cast_fp16")];
+            string var_2907_pad_type_0 = const()[name = string("op_2907_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2907_strides_0 = const()[name = string("op_2907_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2907_pad_0 = const()[name = string("op_2907_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2907_dilations_0 = const()[name = string("op_2907_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2907_groups_0 = const()[name = string("op_2907_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131901952))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133081664))))[name = string("layers_9_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133081792)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2907_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_2907_dilations_0, groups = var_2907_groups_0, pad = var_2907_pad_0, pad_type = var_2907_pad_type_0, strides = var_2907_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = string("op_2907_cast_fp16")];
+            string var_2913_pad_type_0 = const()[name = string("op_2913_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2913_strides_0 = const()[name = string("op_2913_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2913_pad_0 = const()[name = string("op_2913_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2913_dilations_0 = const()[name = string("op_2913_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2913_groups_0 = const()[name = string("op_2913_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133107072))), nonzero_data = tensor<fp16, [11779]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133083392))))[name = string("layers_9_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2913_cast_fp16 = conv(dilations = var_2913_dilations_0, groups = var_2913_groups_0, pad = var_2913_pad_0, pad_type = var_2913_pad_type_0, strides = var_2913_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = string("op_2913_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_21_cast_fp16 = add(x = var_2907_cast_fp16, y = var_2913_cast_fp16)[name = string("hidden_states_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = string("inputs_61_cast_fp16")];
+            tensor<int32, [4]> obj_197_begin_0 = const()[name = string("obj_197_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> obj_197_end_0 = const()[name = string("obj_197_end_0"), val = tensor<int32, [4]>([11, 768, 1, 1536])];
+            tensor<bool, [4]> obj_197_end_mask_0 = const()[name = string("obj_197_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_197_cast_fp16 = slice_by_index(begin = obj_197_begin_0, end = obj_197_end_0, end_mask = obj_197_end_mask_0, x = read_state_2)[name = string("obj_197_cast_fp16")];
+            tensor<int32, [4]> obj_199_begin_0 = const()[name = string("obj_199_begin_0"), val = tensor<int32, [4]>([10, 0, 0, 0])];
+            tensor<int32, [4]> obj_199_end_0 = const()[name = string("obj_199_end_0"), val = tensor<int32, [4]>([11, 768, 1, 1536])];
+            tensor<bool, [4]> obj_199_end_mask_0 = const()[name = string("obj_199_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_199_cast_fp16 = slice_by_index(begin = obj_199_begin_0, end = obj_199_end_0, end_mask = obj_199_end_mask_0, x = read_state_3)[name = string("obj_199_cast_fp16")];
+            int32 var_2936 = const()[name = string("op_2936"), val = int32(3)];
+            tensor<int32, [1]> out_61_axes_0 = const()[name = string("out_61_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_2961_to_fp16 = const()[name = string("op_2961_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_2961_to_fp16, x = inputs_61_cast_fp16)[name = string("out_61_cast_fp16")];
+            tensor<fp16, [768]> obj_187_gamma_0_to_fp16 = const()[name = string("obj_187_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133402048)))];
+            tensor<fp16, [768]> obj_187_beta_0_to_fp16 = const()[name = string("obj_187_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133403648)))];
+            fp16 obj_187_epsilon_0_to_fp16 = const()[name = string("obj_187_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_187_cast_fp16 = batch_norm(beta = obj_187_beta_0_to_fp16, epsilon = obj_187_epsilon_0_to_fp16, gamma = obj_187_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_61_cast_fp16)[name = string("obj_187_cast_fp16")];
+            string var_2983_pad_type_0 = const()[name = string("op_2983_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2983_strides_0 = const()[name = string("op_2983_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2983_pad_0 = const()[name = string("op_2983_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2983_dilations_0 = const()[name = string("op_2983_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2983_groups_0 = const()[name = string("op_2983_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133405248))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133700224))))[name = string("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133700352)))];
+            tensor<fp16, [1, 768, 1, 1]> var_2983_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2983_dilations_0, groups = var_2983_groups_0, pad = var_2983_pad_0, pad_type = var_2983_pad_type_0, strides = var_2983_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_2983_cast_fp16")];
+            string var_2989_pad_type_0 = const()[name = string("op_2989_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2989_strides_0 = const()[name = string("op_2989_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2989_pad_0 = const()[name = string("op_2989_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2989_dilations_0 = const()[name = string("op_2989_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2989_groups_0 = const()[name = string("op_2989_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133706944))), nonzero_data = tensor<fp16, [2435]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133701952))))[name = string("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_2989_cast_fp16 = conv(dilations = var_2989_dilations_0, groups = var_2989_groups_0, pad = var_2989_pad_0, pad_type = var_2989_pad_type_0, strides = var_2989_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_2989_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_41_cast_fp16 = add(x = var_2983_cast_fp16, y = var_2989_cast_fp16)[name = string("query_41_cast_fp16")];
+            string var_2998_pad_type_0 = const()[name = string("op_2998_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_2998_strides_0 = const()[name = string("op_2998_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_2998_pad_0 = const()[name = string("op_2998_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_2998_dilations_0 = const()[name = string("op_2998_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_2998_groups_0 = const()[name = string("op_2998_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(133780736))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134075712))))[name = string("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_2998_cast_fp16 = conv(dilations = var_2998_dilations_0, groups = var_2998_groups_0, pad = var_2998_pad_0, pad_type = var_2998_pad_type_0, strides = var_2998_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_2998_cast_fp16")];
+            string var_3004_pad_type_0 = const()[name = string("op_3004_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3004_strides_0 = const()[name = string("op_3004_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3004_pad_0 = const()[name = string("op_3004_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3004_dilations_0 = const()[name = string("op_3004_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3004_groups_0 = const()[name = string("op_3004_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134081280))), nonzero_data = tensor<fp16, [2657]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134075840))))[name = string("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3004_cast_fp16 = conv(dilations = var_3004_dilations_0, groups = var_3004_groups_0, pad = var_3004_pad_0, pad_type = var_3004_pad_type_0, strides = var_3004_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_3004_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_21_cast_fp16 = add(x = var_2998_cast_fp16, y = var_3004_cast_fp16)[name = string("current_key_21_cast_fp16")];
+            string var_3014_pad_type_0 = const()[name = string("op_3014_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3014_strides_0 = const()[name = string("op_3014_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3014_pad_0 = const()[name = string("op_3014_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3014_dilations_0 = const()[name = string("op_3014_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3014_groups_0 = const()[name = string("op_3014_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134155072))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134450048))))[name = string("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134450176)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3014_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3014_dilations_0, groups = var_3014_groups_0, pad = var_3014_pad_0, pad_type = var_3014_pad_type_0, strides = var_3014_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_187_cast_fp16)[name = string("op_3014_cast_fp16")];
+            string var_3020_pad_type_0 = const()[name = string("op_3020_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3020_strides_0 = const()[name = string("op_3020_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3020_pad_0 = const()[name = string("op_3020_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3020_dilations_0 = const()[name = string("op_3020_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3020_groups_0 = const()[name = string("op_3020_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134458560))), nonzero_data = tensor<fp16, [3338]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134451776))))[name = string("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3020_cast_fp16 = conv(dilations = var_3020_dilations_0, groups = var_3020_groups_0, pad = var_3020_pad_0, pad_type = var_3020_pad_type_0, strides = var_3020_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_187_cast_fp16)[name = string("op_3020_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_21_cast_fp16 = add(x = var_3014_cast_fp16, y = var_3020_cast_fp16)[name = string("current_value_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_3026_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_202_cast_fp16)[name = string("op_3026_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_21_cast_fp16 = add(x = var_71_cast_fp16_10, y = var_3026_cast_fp16)[name = string("key_21_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_3028_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_202_cast_fp16)[name = string("op_3028_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_21_cast_fp16 = add(x = var_86_cast_fp16_10, y = var_3028_cast_fp16)[name = string("value_21_cast_fp16")];
+            tensor<int32, [4]> var_3031 = const()[name = string("op_3031"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_41_cast_fp16 = reshape(shape = var_3031, x = query_41_cast_fp16)[name = string("mh_q_41_cast_fp16")];
+            fp16 var_3033_to_fp16 = const()[name = string("op_3033_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_3034_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_3033_to_fp16)[name = string("op_3034_cast_fp16")];
+            tensor<int32, [4]> var_3035 = const()[name = string("op_3035"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_3036_cast_fp16 = reshape(shape = var_3035, x = key_21_cast_fp16)[name = string("op_3036_cast_fp16")];
+            bool mh_w_81_transpose_x_0 = const()[name = string("mh_w_81_transpose_x_0"), val = bool(true)];
+            bool mh_w_81_transpose_y_0 = const()[name = string("mh_w_81_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_81_cast_fp16 = matmul(transpose_x = mh_w_81_transpose_x_0, transpose_y = mh_w_81_transpose_y_0, x = var_3034_cast_fp16, y = var_3036_cast_fp16)[name = string("mh_w_81_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_83_cast_fp16 = add(x = mh_w_81_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_83_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_3044_cast_fp16 = softmax(axis = var_2936, x = mh_w_83_cast_fp16)[name = string("op_3044_cast_fp16")];
+            tensor<int32, [4]> var_3045 = const()[name = string("op_3045"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_3046_cast_fp16 = reshape(shape = var_3045, x = value_21_cast_fp16)[name = string("op_3046_cast_fp16")];
+            bool attn_41_transpose_x_0 = const()[name = string("attn_41_transpose_x_0"), val = bool(false)];
+            bool attn_41_transpose_y_0 = const()[name = string("attn_41_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3046_cast_fp16, y = var_3044_cast_fp16)[name = string("attn_41_cast_fp16")];
+            tensor<int32, [4]> var_3049 = const()[name = string("op_3049"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_101_cast_fp16 = reshape(shape = var_3049, x = attn_41_cast_fp16)[name = string("input_101_cast_fp16")];
+            string var_3059_pad_type_0 = const()[name = string("op_3059_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3059_strides_0 = const()[name = string("op_3059_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3059_pad_0 = const()[name = string("op_3059_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3059_dilations_0 = const()[name = string("op_3059_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3059_groups_0 = const()[name = string("op_3059_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134532352))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134827328))))[name = string("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134827456)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3059_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3059_dilations_0, groups = var_3059_groups_0, pad = var_3059_pad_0, pad_type = var_3059_pad_type_0, strides = var_3059_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = string("op_3059_cast_fp16")];
+            string var_3065_pad_type_0 = const()[name = string("op_3065_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3065_strides_0 = const()[name = string("op_3065_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3065_pad_0 = const()[name = string("op_3065_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3065_dilations_0 = const()[name = string("op_3065_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3065_groups_0 = const()[name = string("op_3065_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134835904))), nonzero_data = tensor<fp16, [3364]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134829056))))[name = string("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3065_cast_fp16 = conv(dilations = var_3065_dilations_0, groups = var_3065_groups_0, pad = var_3065_pad_0, pad_type = var_3065_pad_type_0, strides = var_3065_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_101_cast_fp16)[name = string("op_3065_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_193_cast_fp16 = add(x = var_3059_cast_fp16, y = var_3065_cast_fp16)[name = string("obj_193_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_193_cast_fp16)[name = string("inputs_63_cast_fp16")];
+            tensor<int32, [1]> out_63_axes_0 = const()[name = string("out_63_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3080_to_fp16 = const()[name = string("op_3080_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_3080_to_fp16, x = inputs_63_cast_fp16)[name = string("out_63_cast_fp16")];
+            tensor<fp16, [768]> obj_195_gamma_0_to_fp16 = const()[name = string("obj_195_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134909696)))];
+            tensor<fp16, [768]> obj_195_beta_0_to_fp16 = const()[name = string("obj_195_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134911296)))];
+            fp16 obj_195_epsilon_0_to_fp16 = const()[name = string("obj_195_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_195_cast_fp16 = batch_norm(beta = obj_195_beta_0_to_fp16, epsilon = obj_195_epsilon_0_to_fp16, gamma = obj_195_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_63_cast_fp16)[name = string("obj_195_cast_fp16")];
+            string var_3100_pad_type_0 = const()[name = string("op_3100_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3100_strides_0 = const()[name = string("op_3100_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3100_pad_0 = const()[name = string("op_3100_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3100_dilations_0 = const()[name = string("op_3100_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3100_groups_0 = const()[name = string("op_3100_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(134912896))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135207872))))[name = string("layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135208000)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3100_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3100_dilations_0, groups = var_3100_groups_0, pad = var_3100_pad_0, pad_type = var_3100_pad_type_0, strides = var_3100_strides_0, weight = layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_195_cast_fp16)[name = string("op_3100_cast_fp16")];
+            string var_3106_pad_type_0 = const()[name = string("op_3106_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3106_strides_0 = const()[name = string("op_3106_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3106_pad_0 = const()[name = string("op_3106_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3106_dilations_0 = const()[name = string("op_3106_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3106_groups_0 = const()[name = string("op_3106_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135214912))), nonzero_data = tensor<fp16, [2606]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135209600))))[name = string("layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3106_cast_fp16 = conv(dilations = var_3106_dilations_0, groups = var_3106_groups_0, pad = var_3106_pad_0, pad_type = var_3106_pad_type_0, strides = var_3106_strides_0, weight = layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_195_cast_fp16)[name = string("op_3106_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_43_cast_fp16 = add(x = var_3100_cast_fp16, y = var_3106_cast_fp16)[name = string("query_43_cast_fp16")];
+            tensor<int32, [4]> var_3109 = const()[name = string("op_3109"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_43_cast_fp16 = reshape(shape = var_3109, x = query_43_cast_fp16)[name = string("mh_q_43_cast_fp16")];
+            fp16 var_3111_to_fp16 = const()[name = string("op_3111_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_3112_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_3111_to_fp16)[name = string("op_3112_cast_fp16")];
+            tensor<int32, [4]> var_3113 = const()[name = string("op_3113"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_3114_cast_fp16 = reshape(shape = var_3113, x = obj_197_cast_fp16)[name = string("op_3114_cast_fp16")];
+            bool mh_w_85_transpose_x_0 = const()[name = string("mh_w_85_transpose_x_0"), val = bool(true)];
+            bool mh_w_85_transpose_y_0 = const()[name = string("mh_w_85_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_85_cast_fp16 = matmul(transpose_x = mh_w_85_transpose_x_0, transpose_y = mh_w_85_transpose_y_0, x = var_3112_cast_fp16, y = var_3114_cast_fp16)[name = string("mh_w_85_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_87_cast_fp16 = add(x = mh_w_85_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_87_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_203_cast_fp16 = softmax(axis = var_2936, x = mh_w_87_cast_fp16)[name = string("obj_203_cast_fp16")];
+            tensor<int32, [4]> var_3123 = const()[name = string("op_3123"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_3124_cast_fp16 = reshape(shape = var_3123, x = obj_199_cast_fp16)[name = string("op_3124_cast_fp16")];
+            bool attn_43_transpose_x_0 = const()[name = string("attn_43_transpose_x_0"), val = bool(false)];
+            bool attn_43_transpose_y_0 = const()[name = string("attn_43_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3124_cast_fp16, y = obj_203_cast_fp16)[name = string("attn_43_cast_fp16")];
+            tensor<int32, [4]> var_3127 = const()[name = string("op_3127"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_103_cast_fp16 = reshape(shape = var_3127, x = attn_43_cast_fp16)[name = string("input_103_cast_fp16")];
+            string var_3137_pad_type_0 = const()[name = string("op_3137_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3137_strides_0 = const()[name = string("op_3137_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3137_pad_0 = const()[name = string("op_3137_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3137_dilations_0 = const()[name = string("op_3137_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3137_groups_0 = const()[name = string("op_3137_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135288704))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135583680))))[name = string("layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135583808)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3137_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3137_dilations_0, groups = var_3137_groups_0, pad = var_3137_pad_0, pad_type = var_3137_pad_type_0, strides = var_3137_strides_0, weight = layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = string("op_3137_cast_fp16")];
+            string var_3143_pad_type_0 = const()[name = string("op_3143_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3143_strides_0 = const()[name = string("op_3143_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3143_pad_0 = const()[name = string("op_3143_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3143_dilations_0 = const()[name = string("op_3143_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3143_groups_0 = const()[name = string("op_3143_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135592064))), nonzero_data = tensor<fp16, [3295]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135585408))))[name = string("layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3143_cast_fp16 = conv(dilations = var_3143_dilations_0, groups = var_3143_groups_0, pad = var_3143_pad_0, pad_type = var_3143_pad_type_0, strides = var_3143_strides_0, weight = layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = string("op_3143_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_201_cast_fp16 = add(x = var_3137_cast_fp16, y = var_3143_cast_fp16)[name = string("obj_201_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_201_cast_fp16)[name = string("inputs_65_cast_fp16")];
+            tensor<int32, [1]> out_65_axes_0 = const()[name = string("out_65_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3157_to_fp16 = const()[name = string("op_3157_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_3157_to_fp16, x = inputs_65_cast_fp16)[name = string("out_65_cast_fp16")];
+            tensor<fp16, [768]> input_105_gamma_0_to_fp16 = const()[name = string("input_105_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135665856)))];
+            tensor<fp16, [768]> input_105_beta_0_to_fp16 = const()[name = string("input_105_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135667456)))];
+            fp16 input_105_epsilon_0_to_fp16 = const()[name = string("input_105_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_65_cast_fp16)[name = string("input_105_cast_fp16")];
+            string var_3175_pad_type_0 = const()[name = string("op_3175_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3175_strides_0 = const()[name = string("op_3175_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3175_pad_0 = const()[name = string("op_3175_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3175_dilations_0 = const()[name = string("op_3175_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3175_groups_0 = const()[name = string("op_3175_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135669056))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136848768))))[name = string("layers_10_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136848896)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_3175_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_3175_dilations_0, groups = var_3175_groups_0, pad = var_3175_pad_0, pad_type = var_3175_pad_type_0, strides = var_3175_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = string("op_3175_cast_fp16")];
+            string var_3181_pad_type_0 = const()[name = string("op_3181_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3181_strides_0 = const()[name = string("op_3181_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3181_pad_0 = const()[name = string("op_3181_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3181_dilations_0 = const()[name = string("op_3181_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3181_groups_0 = const()[name = string("op_3181_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136876736))), nonzero_data = tensor<fp16, [10762]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136855104))))[name = string("layers_10_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_3181_cast_fp16 = conv(dilations = var_3181_dilations_0, groups = var_3181_groups_0, pad = var_3181_pad_0, pad_type = var_3181_pad_type_0, strides = var_3181_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = string("op_3181_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_107_cast_fp16 = add(x = var_3175_cast_fp16, y = var_3181_cast_fp16)[name = string("input_107_cast_fp16")];
+            string input_109_mode_0 = const()[name = string("input_109_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = string("input_109_cast_fp16")];
+            string var_3192_pad_type_0 = const()[name = string("op_3192_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3192_strides_0 = const()[name = string("op_3192_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3192_pad_0 = const()[name = string("op_3192_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3192_dilations_0 = const()[name = string("op_3192_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3192_groups_0 = const()[name = string("op_3192_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(137171712))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138351424))))[name = string("layers_10_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138351552)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3192_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_3192_dilations_0, groups = var_3192_groups_0, pad = var_3192_pad_0, pad_type = var_3192_pad_type_0, strides = var_3192_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = string("op_3192_cast_fp16")];
+            string var_3198_pad_type_0 = const()[name = string("op_3198_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3198_strides_0 = const()[name = string("op_3198_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3198_pad_0 = const()[name = string("op_3198_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3198_dilations_0 = const()[name = string("op_3198_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3198_groups_0 = const()[name = string("op_3198_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138378752))), nonzero_data = tensor<fp16, [12739]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138353152))))[name = string("layers_10_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3198_cast_fp16 = conv(dilations = var_3198_dilations_0, groups = var_3198_groups_0, pad = var_3198_pad_0, pad_type = var_3198_pad_type_0, strides = var_3198_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_109_cast_fp16)[name = string("op_3198_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_23_cast_fp16 = add(x = var_3192_cast_fp16, y = var_3198_cast_fp16)[name = string("hidden_states_23_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = string("inputs_67_cast_fp16")];
+            tensor<int32, [4]> obj_215_begin_0 = const()[name = string("obj_215_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> obj_215_end_0 = const()[name = string("obj_215_end_0"), val = tensor<int32, [4]>([12, 768, 1, 1536])];
+            tensor<bool, [4]> obj_215_end_mask_0 = const()[name = string("obj_215_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_215_cast_fp16 = slice_by_index(begin = obj_215_begin_0, end = obj_215_end_0, end_mask = obj_215_end_mask_0, x = read_state_2)[name = string("obj_215_cast_fp16")];
+            tensor<int32, [4]> obj_217_begin_0 = const()[name = string("obj_217_begin_0"), val = tensor<int32, [4]>([11, 0, 0, 0])];
+            tensor<int32, [4]> obj_217_end_0 = const()[name = string("obj_217_end_0"), val = tensor<int32, [4]>([12, 768, 1, 1536])];
+            tensor<bool, [4]> obj_217_end_mask_0 = const()[name = string("obj_217_end_mask_0"), val = tensor<bool, [4]>([false, true, true, true])];
+            tensor<fp16, [1, 768, 1, 1536]> obj_217_cast_fp16 = slice_by_index(begin = obj_217_begin_0, end = obj_217_end_0, end_mask = obj_217_end_mask_0, x = read_state_3)[name = string("obj_217_cast_fp16")];
+            int32 var_3221 = const()[name = string("op_3221"), val = int32(3)];
+            tensor<int32, [1]> out_67_axes_0 = const()[name = string("out_67_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3246_to_fp16 = const()[name = string("op_3246_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_3246_to_fp16, x = inputs_67_cast_fp16)[name = string("out_67_cast_fp16")];
+            tensor<fp16, [768]> obj_205_gamma_0_to_fp16 = const()[name = string("obj_205_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138673728)))];
+            tensor<fp16, [768]> obj_205_beta_0_to_fp16 = const()[name = string("obj_205_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138675328)))];
+            fp16 obj_205_epsilon_0_to_fp16 = const()[name = string("obj_205_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_205_cast_fp16 = batch_norm(beta = obj_205_beta_0_to_fp16, epsilon = obj_205_epsilon_0_to_fp16, gamma = obj_205_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_67_cast_fp16)[name = string("obj_205_cast_fp16")];
+            string var_3268_pad_type_0 = const()[name = string("op_3268_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3268_strides_0 = const()[name = string("op_3268_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3268_pad_0 = const()[name = string("op_3268_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3268_dilations_0 = const()[name = string("op_3268_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3268_groups_0 = const()[name = string("op_3268_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138676928))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138971904))))[name = string("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138972032)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3268_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3268_dilations_0, groups = var_3268_groups_0, pad = var_3268_pad_0, pad_type = var_3268_pad_type_0, strides = var_3268_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3268_cast_fp16")];
+            string var_3274_pad_type_0 = const()[name = string("op_3274_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3274_strides_0 = const()[name = string("op_3274_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3274_pad_0 = const()[name = string("op_3274_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3274_dilations_0 = const()[name = string("op_3274_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3274_groups_0 = const()[name = string("op_3274_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138978816))), nonzero_data = tensor<fp16, [2538]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138973632))))[name = string("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3274_cast_fp16 = conv(dilations = var_3274_dilations_0, groups = var_3274_groups_0, pad = var_3274_pad_0, pad_type = var_3274_pad_type_0, strides = var_3274_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3274_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_45_cast_fp16 = add(x = var_3268_cast_fp16, y = var_3274_cast_fp16)[name = string("query_45_cast_fp16")];
+            string var_3283_pad_type_0 = const()[name = string("op_3283_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3283_strides_0 = const()[name = string("op_3283_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3283_pad_0 = const()[name = string("op_3283_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3283_dilations_0 = const()[name = string("op_3283_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3283_groups_0 = const()[name = string("op_3283_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139052608))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139347584))))[name = string("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [1, 768, 1, 1]> var_3283_cast_fp16 = conv(dilations = var_3283_dilations_0, groups = var_3283_groups_0, pad = var_3283_pad_0, pad_type = var_3283_pad_type_0, strides = var_3283_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3283_cast_fp16")];
+            string var_3289_pad_type_0 = const()[name = string("op_3289_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3289_strides_0 = const()[name = string("op_3289_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3289_pad_0 = const()[name = string("op_3289_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3289_dilations_0 = const()[name = string("op_3289_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3289_groups_0 = const()[name = string("op_3289_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139352704))), nonzero_data = tensor<fp16, [2436]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139347712))))[name = string("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3289_cast_fp16 = conv(dilations = var_3289_dilations_0, groups = var_3289_groups_0, pad = var_3289_pad_0, pad_type = var_3289_pad_type_0, strides = var_3289_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3289_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_key_cast_fp16 = add(x = var_3283_cast_fp16, y = var_3289_cast_fp16)[name = string("current_key_cast_fp16")];
+            string var_3299_pad_type_0 = const()[name = string("op_3299_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3299_strides_0 = const()[name = string("op_3299_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3299_pad_0 = const()[name = string("op_3299_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3299_dilations_0 = const()[name = string("op_3299_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3299_groups_0 = const()[name = string("op_3299_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139426496))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139721472))))[name = string("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139721600)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3299_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3299_dilations_0, groups = var_3299_groups_0, pad = var_3299_pad_0, pad_type = var_3299_pad_type_0, strides = var_3299_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_205_cast_fp16)[name = string("op_3299_cast_fp16")];
+            string var_3305_pad_type_0 = const()[name = string("op_3305_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3305_strides_0 = const()[name = string("op_3305_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3305_pad_0 = const()[name = string("op_3305_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3305_dilations_0 = const()[name = string("op_3305_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3305_groups_0 = const()[name = string("op_3305_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139729792))), nonzero_data = tensor<fp16, [3238]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139723200))))[name = string("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3305_cast_fp16 = conv(dilations = var_3305_dilations_0, groups = var_3305_groups_0, pad = var_3305_pad_0, pad_type = var_3305_pad_type_0, strides = var_3305_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_205_cast_fp16)[name = string("op_3305_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> current_value_cast_fp16 = add(x = var_3299_cast_fp16, y = var_3305_cast_fp16)[name = string("current_value_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_3311_cast_fp16 = mul(x = current_key_cast_fp16, y = var_202_cast_fp16)[name = string("op_3311_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> key_cast_fp16 = add(x = var_71_cast_fp16_11, y = var_3311_cast_fp16)[name = string("key_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> var_3313_cast_fp16 = mul(x = current_value_cast_fp16, y = var_202_cast_fp16)[name = string("op_3313_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 448]> value_cast_fp16 = add(x = var_86_cast_fp16_11, y = var_3313_cast_fp16)[name = string("value_cast_fp16")];
+            tensor<int32, [4]> var_3316 = const()[name = string("op_3316"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_45_cast_fp16 = reshape(shape = var_3316, x = query_45_cast_fp16)[name = string("mh_q_45_cast_fp16")];
+            fp16 var_3318_to_fp16 = const()[name = string("op_3318_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_3319_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_3318_to_fp16)[name = string("op_3319_cast_fp16")];
+            tensor<int32, [4]> var_3320 = const()[name = string("op_3320"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_3321_cast_fp16 = reshape(shape = var_3320, x = key_cast_fp16)[name = string("op_3321_cast_fp16")];
+            bool mh_w_89_transpose_x_0 = const()[name = string("mh_w_89_transpose_x_0"), val = bool(true)];
+            bool mh_w_89_transpose_y_0 = const()[name = string("mh_w_89_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_89_cast_fp16 = matmul(transpose_x = mh_w_89_transpose_x_0, transpose_y = mh_w_89_transpose_y_0, x = var_3319_cast_fp16, y = var_3321_cast_fp16)[name = string("mh_w_89_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> mh_w_91_cast_fp16 = add(x = mh_w_89_cast_fp16, y = var_219_cast_fp16)[name = string("mh_w_91_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 448]> var_3329_cast_fp16 = softmax(axis = var_3221, x = mh_w_91_cast_fp16)[name = string("op_3329_cast_fp16")];
+            tensor<int32, [4]> var_3330 = const()[name = string("op_3330"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 448]> var_3331_cast_fp16 = reshape(shape = var_3330, x = value_cast_fp16)[name = string("op_3331_cast_fp16")];
+            bool attn_45_transpose_x_0 = const()[name = string("attn_45_transpose_x_0"), val = bool(false)];
+            bool attn_45_transpose_y_0 = const()[name = string("attn_45_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_3331_cast_fp16, y = var_3329_cast_fp16)[name = string("attn_45_cast_fp16")];
+            tensor<int32, [4]> var_3334 = const()[name = string("op_3334"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_111_cast_fp16 = reshape(shape = var_3334, x = attn_45_cast_fp16)[name = string("input_111_cast_fp16")];
+            string var_3344_pad_type_0 = const()[name = string("op_3344_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3344_strides_0 = const()[name = string("op_3344_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3344_pad_0 = const()[name = string("op_3344_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3344_dilations_0 = const()[name = string("op_3344_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3344_groups_0 = const()[name = string("op_3344_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(139803584))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140098560))))[name = string("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140098688)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3344_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3344_dilations_0, groups = var_3344_groups_0, pad = var_3344_pad_0, pad_type = var_3344_pad_type_0, strides = var_3344_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = string("op_3344_cast_fp16")];
+            string var_3350_pad_type_0 = const()[name = string("op_3350_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3350_strides_0 = const()[name = string("op_3350_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3350_pad_0 = const()[name = string("op_3350_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3350_dilations_0 = const()[name = string("op_3350_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3350_groups_0 = const()[name = string("op_3350_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140109696))), nonzero_data = tensor<fp16, [4653]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140100288))))[name = string("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3350_cast_fp16 = conv(dilations = var_3350_dilations_0, groups = var_3350_groups_0, pad = var_3350_pad_0, pad_type = var_3350_pad_type_0, strides = var_3350_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = string("op_3350_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_211_cast_fp16 = add(x = var_3344_cast_fp16, y = var_3350_cast_fp16)[name = string("obj_211_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_211_cast_fp16)[name = string("inputs_69_cast_fp16")];
+            tensor<int32, [1]> out_69_axes_0 = const()[name = string("out_69_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3365_to_fp16 = const()[name = string("op_3365_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_3365_to_fp16, x = inputs_69_cast_fp16)[name = string("out_69_cast_fp16")];
+            tensor<fp16, [768]> obj_213_gamma_0_to_fp16 = const()[name = string("obj_213_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140183488)))];
+            tensor<fp16, [768]> obj_213_beta_0_to_fp16 = const()[name = string("obj_213_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140185088)))];
+            fp16 obj_213_epsilon_0_to_fp16 = const()[name = string("obj_213_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> obj_213_cast_fp16 = batch_norm(beta = obj_213_beta_0_to_fp16, epsilon = obj_213_epsilon_0_to_fp16, gamma = obj_213_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_69_cast_fp16)[name = string("obj_213_cast_fp16")];
+            string var_3385_pad_type_0 = const()[name = string("op_3385_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3385_strides_0 = const()[name = string("op_3385_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3385_pad_0 = const()[name = string("op_3385_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3385_dilations_0 = const()[name = string("op_3385_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3385_groups_0 = const()[name = string("op_3385_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140186688))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140481664))))[name = string("layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140481792)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3385_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3385_dilations_0, groups = var_3385_groups_0, pad = var_3385_pad_0, pad_type = var_3385_pad_type_0, strides = var_3385_strides_0, weight = layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_213_cast_fp16)[name = string("op_3385_cast_fp16")];
+            string var_3391_pad_type_0 = const()[name = string("op_3391_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3391_strides_0 = const()[name = string("op_3391_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3391_pad_0 = const()[name = string("op_3391_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3391_dilations_0 = const()[name = string("op_3391_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3391_groups_0 = const()[name = string("op_3391_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140489728))), nonzero_data = tensor<fp16, [3114]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140483392))))[name = string("layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3391_cast_fp16 = conv(dilations = var_3391_dilations_0, groups = var_3391_groups_0, pad = var_3391_pad_0, pad_type = var_3391_pad_type_0, strides = var_3391_strides_0, weight = layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_213_cast_fp16)[name = string("op_3391_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> query_cast_fp16 = add(x = var_3385_cast_fp16, y = var_3391_cast_fp16)[name = string("query_cast_fp16")];
+            tensor<int32, [4]> var_3394 = const()[name = string("op_3394"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1]> mh_q_cast_fp16 = reshape(shape = var_3394, x = query_cast_fp16)[name = string("mh_q_cast_fp16")];
+            fp16 var_3396_to_fp16 = const()[name = string("op_3396_to_fp16"), val = fp16(0x1p-3)];
+            tensor<fp16, [1, 12, 64, 1]> var_3397_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_3396_to_fp16)[name = string("op_3397_cast_fp16")];
+            tensor<int32, [4]> var_3398 = const()[name = string("op_3398"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_3399_cast_fp16 = reshape(shape = var_3398, x = obj_215_cast_fp16)[name = string("op_3399_cast_fp16")];
+            bool mh_w_93_transpose_x_0 = const()[name = string("mh_w_93_transpose_x_0"), val = bool(true)];
+            bool mh_w_93_transpose_y_0 = const()[name = string("mh_w_93_transpose_y_0"), val = bool(false)];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_93_cast_fp16 = matmul(transpose_x = mh_w_93_transpose_x_0, transpose_y = mh_w_93_transpose_y_0, x = var_3397_cast_fp16, y = var_3399_cast_fp16)[name = string("mh_w_93_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> mh_w_cast_fp16 = add(x = mh_w_93_cast_fp16, y = var_297_cast_fp16)[name = string("mh_w_cast_fp16")];
+            tensor<fp16, [1, 12, 1, 1536]> obj_221_cast_fp16 = softmax(axis = var_3221, x = mh_w_cast_fp16)[name = string("obj_221_cast_fp16")];
+            tensor<int32, [4]> var_3408 = const()[name = string("op_3408"), val = tensor<int32, [4]>([1, 12, 64, -1])];
+            tensor<fp16, [1, 12, 64, 1536]> var_3409_cast_fp16 = reshape(shape = var_3408, x = obj_217_cast_fp16)[name = string("op_3409_cast_fp16")];
+            bool attn_transpose_x_0 = const()[name = string("attn_transpose_x_0"), val = bool(false)];
+            bool attn_transpose_y_0 = const()[name = string("attn_transpose_y_0"), val = bool(true)];
+            tensor<fp16, [1, 12, 64, 1]> attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_3409_cast_fp16, y = obj_221_cast_fp16)[name = string("attn_cast_fp16")];
+            tensor<int32, [4]> var_3412 = const()[name = string("op_3412"), val = tensor<int32, [4]>([1, 768, 1, -1])];
+            tensor<fp16, [1, 768, 1, 1]> input_113_cast_fp16 = reshape(shape = var_3412, x = attn_cast_fp16)[name = string("input_113_cast_fp16")];
+            string var_3422_pad_type_0 = const()[name = string("op_3422_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3422_strides_0 = const()[name = string("op_3422_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3422_pad_0 = const()[name = string("op_3422_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3422_dilations_0 = const()[name = string("op_3422_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3422_groups_0 = const()[name = string("op_3422_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140563520))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140858496))))[name = string("layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = string("layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140858624)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3422_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3422_dilations_0, groups = var_3422_groups_0, pad = var_3422_pad_0, pad_type = var_3422_pad_type_0, strides = var_3422_strides_0, weight = layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = string("op_3422_cast_fp16")];
+            string var_3428_pad_type_0 = const()[name = string("op_3428_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3428_strides_0 = const()[name = string("op_3428_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3428_pad_0 = const()[name = string("op_3428_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3428_dilations_0 = const()[name = string("op_3428_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3428_groups_0 = const()[name = string("op_3428_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 768, 1, 1]> layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140871296))), nonzero_data = tensor<fp16, [5495]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140860224))))[name = string("layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3428_cast_fp16 = conv(dilations = var_3428_dilations_0, groups = var_3428_groups_0, pad = var_3428_pad_0, pad_type = var_3428_pad_type_0, strides = var_3428_strides_0, weight = layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = string("op_3428_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> obj_219_cast_fp16 = add(x = var_3422_cast_fp16, y = var_3428_cast_fp16)[name = string("obj_219_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_219_cast_fp16)[name = string("inputs_71_cast_fp16")];
+            tensor<int32, [1]> out_71_axes_0 = const()[name = string("out_71_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3439_to_fp16 = const()[name = string("op_3439_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_3439_to_fp16, x = inputs_71_cast_fp16)[name = string("out_71_cast_fp16")];
+            tensor<fp16, [768]> input_115_gamma_0_to_fp16 = const()[name = string("input_115_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140945088)))];
+            tensor<fp16, [768]> input_115_beta_0_to_fp16 = const()[name = string("input_115_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140946688)))];
+            fp16 input_115_epsilon_0_to_fp16 = const()[name = string("input_115_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_71_cast_fp16)[name = string("input_115_cast_fp16")];
+            string var_3457_pad_type_0 = const()[name = string("op_3457_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3457_strides_0 = const()[name = string("op_3457_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3457_pad_0 = const()[name = string("op_3457_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3457_dilations_0 = const()[name = string("op_3457_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3457_groups_0 = const()[name = string("op_3457_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(140948288))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142128000))))[name = string("layers_11_fc1_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [3072]> layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor<fp16, [3072]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142128128)))];
+            tensor<fp16, [1, 3072, 1, 1]> var_3457_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_3457_dilations_0, groups = var_3457_groups_0, pad = var_3457_pad_0, pad_type = var_3457_pad_type_0, strides = var_3457_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = string("op_3457_cast_fp16")];
+            string var_3463_pad_type_0 = const()[name = string("op_3463_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3463_strides_0 = const()[name = string("op_3463_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3463_pad_0 = const()[name = string("op_3463_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3463_dilations_0 = const()[name = string("op_3463_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3463_groups_0 = const()[name = string("op_3463_groups_0"), val = int32(1)];
+            tensor<fp16, [3072, 768, 1, 1]> layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [3072, 768, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142163008))), nonzero_data = tensor<fp16, [14304]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142134336))))[name = string("layers_11_fc1_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 3072, 1, 1]> var_3463_cast_fp16 = conv(dilations = var_3463_dilations_0, groups = var_3463_groups_0, pad = var_3463_pad_0, pad_type = var_3463_pad_type_0, strides = var_3463_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = string("op_3463_cast_fp16")];
+            tensor<fp16, [1, 3072, 1, 1]> input_117_cast_fp16 = add(x = var_3457_cast_fp16, y = var_3463_cast_fp16)[name = string("input_117_cast_fp16")];
+            string input_mode_0 = const()[name = string("input_mode_0"), val = string("EXACT")];
+            tensor<fp16, [1, 3072, 1, 1]> input_cast_fp16 = gelu(mode = input_mode_0, x = input_117_cast_fp16)[name = string("input_cast_fp16")];
+            string var_3474_pad_type_0 = const()[name = string("op_3474_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3474_strides_0 = const()[name = string("op_3474_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3474_pad_0 = const()[name = string("op_3474_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3474_dilations_0 = const()[name = string("op_3474_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3474_groups_0 = const()[name = string("op_3474_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense(indices = tensor<uint4, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(142457984))), lut = tensor<fp16, [1, 1, 1, 1, 16, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143637696))))[name = string("layers_11_fc2_inlier_module_weight_to_fp16_palettized")];
+            tensor<fp16, [768]> layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = string("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143637824)))];
+            tensor<fp16, [1, 768, 1, 1]> var_3474_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_3474_dilations_0, groups = var_3474_groups_0, pad = var_3474_pad_0, pad_type = var_3474_pad_type_0, strides = var_3474_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = string("op_3474_cast_fp16")];
+            string var_3480_pad_type_0 = const()[name = string("op_3480_pad_type_0"), val = string("valid")];
+            tensor<int32, [2]> var_3480_strides_0 = const()[name = string("op_3480_strides_0"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [4]> var_3480_pad_0 = const()[name = string("op_3480_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [2]> var_3480_dilations_0 = const()[name = string("op_3480_dilations_0"), val = tensor<int32, [2]>([1, 1])];
+            int32 var_3480_groups_0 = const()[name = string("op_3480_groups_0"), val = int32(1)];
+            tensor<fp16, [768, 3072, 1, 1]> layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense(mask = tensor<uint1, [768, 3072, 1, 1]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143670336))), nonzero_data = tensor<fp16, [15410]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143639424))))[name = string("layers_11_fc2_outlier_module_weight_to_fp16_sparsified")];
+            tensor<fp16, [1, 768, 1, 1]> var_3480_cast_fp16 = conv(dilations = var_3480_dilations_0, groups = var_3480_groups_0, pad = var_3480_pad_0, pad_type = var_3480_pad_type_0, strides = var_3480_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = string("op_3480_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_25_cast_fp16 = add(x = var_3474_cast_fp16, y = var_3480_cast_fp16)[name = string("hidden_states_25_cast_fp16")];
+            tensor<fp16, [1, 768, 1, 1]> inputs_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = string("inputs_cast_fp16")];
+            tensor<int32, [1]> out_axes_0 = const()[name = string("out_axes_0"), val = tensor<int32, [1]>([1])];
+            fp16 var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_3499_to_fp16, x = inputs_cast_fp16)[name = string("out_cast_fp16")];
+            tensor<fp16, [768]> hidden_states_gamma_0_to_fp16 = const()[name = string("hidden_states_gamma_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143965312)))];
+            tensor<fp16, [768]> hidden_states_beta_0_to_fp16 = const()[name = string("hidden_states_beta_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143966912)))];
+            fp16 hidden_states_epsilon_0_to_fp16 = const()[name = string("hidden_states_epsilon_0_to_fp16"), val = fp16(0x1.5p-17)];
+            tensor<fp16, [1, 768, 1, 1]> hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_5_mean_0_to_fp16, variance = obj_5_variance_0_to_fp16, x = out_cast_fp16)[name = string("hidden_states_cast_fp16")];
+            tensor<int32, [1]> var_3510_axes_0 = const()[name = string("op_3510_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 768, 1]> var_3510_cast_fp16 = squeeze(axes = var_3510_axes_0, x = hidden_states_cast_fp16)[name = string("op_3510_cast_fp16")];
+            tensor<int32, [3]> var_3513_perm_0 = const()[name = string("op_3513_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
+            tensor<fp16, [51865]> linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor<fp16, [51865]>(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143968512)))];
+            tensor<fp16, [1, 1, 768]> var_3513_cast_fp16 = transpose(perm = var_3513_perm_0, x = var_3510_cast_fp16)[name = string("transpose_0")];
+            tensor<fp16, [1, 1, 51865]> logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_3513_cast_fp16)[name = string("linear_0_cast_fp16")];
+            int32 var_3517 = const()[name = string("op_3517"), val = int32(1)];
+            bool obj_225_interleave_0 = const()[name = string("obj_225_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 9216, 1, 1]> key_cache_updates = concat(axis = var_3517, interleave = obj_225_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_cast_fp16))[name = string("obj_225_cast_fp16")];
+            int32 var_3520 = const()[name = string("op_3520"), val = int32(1)];
+            bool obj_227_interleave_0 = const()[name = string("obj_227_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 9216, 1, 1]> value_cache_updates = concat(axis = var_3520, interleave = obj_227_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_cast_fp16))[name = string("obj_227_cast_fp16")];
+            tensor<int32, [4]> var_3531_begin_0 = const()[name = string("op_3531_begin_0"), val = tensor<int32, [4]>([0, 3, 0, 0])];
+            tensor<int32, [4]> var_3531_end_0 = const()[name = string("op_3531_end_0"), val = tensor<int32, [4]>([1, 4, 1, 1536])];
+            tensor<bool, [4]> var_3531_end_mask_0 = const()[name = string("op_3531_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3531_cast_fp16 = slice_by_index(begin = var_3531_begin_0, end = var_3531_end_0, end_mask = var_3531_end_mask_0, x = obj_113_cast_fp16)[name = string("op_3531_cast_fp16")];
+            tensor<int32, [4]> var_3534_begin_0 = const()[name = string("op_3534_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3534_end_0 = const()[name = string("op_3534_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3534_end_mask_0 = const()[name = string("op_3534_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3534_squeeze_mask_0 = const()[name = string("op_3534_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3534_cast_fp16 = slice_by_index(begin = var_3534_begin_0, end = var_3534_end_0, end_mask = var_3534_end_mask_0, squeeze_mask = var_3534_squeeze_mask_0, x = var_3531_cast_fp16)[name = string("op_3534_cast_fp16")];
+            tensor<int32, [4]> var_3549_begin_0 = const()[name = string("op_3549_begin_0"), val = tensor<int32, [4]>([0, 9, 0, 0])];
+            tensor<int32, [4]> var_3549_end_0 = const()[name = string("op_3549_end_0"), val = tensor<int32, [4]>([1, 10, 1, 1536])];
+            tensor<bool, [4]> var_3549_end_mask_0 = const()[name = string("op_3549_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3549_cast_fp16 = slice_by_index(begin = var_3549_begin_0, end = var_3549_end_0, end_mask = var_3549_end_mask_0, x = obj_113_cast_fp16)[name = string("op_3549_cast_fp16")];
+            tensor<int32, [4]> var_3552_begin_0 = const()[name = string("op_3552_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3552_end_0 = const()[name = string("op_3552_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3552_end_mask_0 = const()[name = string("op_3552_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3552_squeeze_mask_0 = const()[name = string("op_3552_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3552_cast_fp16 = slice_by_index(begin = var_3552_begin_0, end = var_3552_end_0, end_mask = var_3552_end_mask_0, squeeze_mask = var_3552_squeeze_mask_0, x = var_3549_cast_fp16)[name = string("op_3552_cast_fp16")];
+            tensor<int32, [4]> var_3567_begin_0 = const()[name = string("op_3567_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3567_end_0 = const()[name = string("op_3567_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3567_end_mask_0 = const()[name = string("op_3567_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3567_cast_fp16 = slice_by_index(begin = var_3567_begin_0, end = var_3567_end_0, end_mask = var_3567_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3567_cast_fp16")];
+            tensor<int32, [4]> var_3570_begin_0 = const()[name = string("op_3570_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3570_end_0 = const()[name = string("op_3570_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3570_end_mask_0 = const()[name = string("op_3570_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3570_squeeze_mask_0 = const()[name = string("op_3570_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3570_cast_fp16 = slice_by_index(begin = var_3570_begin_0, end = var_3570_end_0, end_mask = var_3570_end_mask_0, squeeze_mask = var_3570_squeeze_mask_0, x = var_3567_cast_fp16)[name = string("op_3570_cast_fp16")];
+            tensor<int32, [4]> var_3585_begin_0 = const()[name = string("op_3585_begin_0"), val = tensor<int32, [4]>([0, 4, 0, 0])];
+            tensor<int32, [4]> var_3585_end_0 = const()[name = string("op_3585_end_0"), val = tensor<int32, [4]>([1, 5, 1, 1536])];
+            tensor<bool, [4]> var_3585_end_mask_0 = const()[name = string("op_3585_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3585_cast_fp16 = slice_by_index(begin = var_3585_begin_0, end = var_3585_end_0, end_mask = var_3585_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3585_cast_fp16")];
+            tensor<int32, [4]> var_3588_begin_0 = const()[name = string("op_3588_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3588_end_0 = const()[name = string("op_3588_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3588_end_mask_0 = const()[name = string("op_3588_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3588_squeeze_mask_0 = const()[name = string("op_3588_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3588_cast_fp16 = slice_by_index(begin = var_3588_begin_0, end = var_3588_end_0, end_mask = var_3588_end_mask_0, squeeze_mask = var_3588_squeeze_mask_0, x = var_3585_cast_fp16)[name = string("op_3588_cast_fp16")];
+            tensor<int32, [4]> var_3603_begin_0 = const()[name = string("op_3603_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_3603_end_0 = const()[name = string("op_3603_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1536])];
+            tensor<bool, [4]> var_3603_end_mask_0 = const()[name = string("op_3603_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3603_cast_fp16 = slice_by_index(begin = var_3603_begin_0, end = var_3603_end_0, end_mask = var_3603_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3603_cast_fp16")];
+            tensor<int32, [4]> var_3606_begin_0 = const()[name = string("op_3606_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3606_end_0 = const()[name = string("op_3606_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3606_end_mask_0 = const()[name = string("op_3606_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3606_squeeze_mask_0 = const()[name = string("op_3606_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3606_cast_fp16 = slice_by_index(begin = var_3606_begin_0, end = var_3606_end_0, end_mask = var_3606_end_mask_0, squeeze_mask = var_3606_squeeze_mask_0, x = var_3603_cast_fp16)[name = string("op_3606_cast_fp16")];
+            tensor<int32, [4]> var_3621_begin_0 = const()[name = string("op_3621_begin_0"), val = tensor<int32, [4]>([0, 8, 0, 0])];
+            tensor<int32, [4]> var_3621_end_0 = const()[name = string("op_3621_end_0"), val = tensor<int32, [4]>([1, 9, 1, 1536])];
+            tensor<bool, [4]> var_3621_end_mask_0 = const()[name = string("op_3621_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3621_cast_fp16 = slice_by_index(begin = var_3621_begin_0, end = var_3621_end_0, end_mask = var_3621_end_mask_0, x = obj_167_cast_fp16)[name = string("op_3621_cast_fp16")];
+            tensor<int32, [4]> var_3624_begin_0 = const()[name = string("op_3624_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3624_end_0 = const()[name = string("op_3624_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3624_end_mask_0 = const()[name = string("op_3624_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3624_squeeze_mask_0 = const()[name = string("op_3624_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3624_cast_fp16 = slice_by_index(begin = var_3624_begin_0, end = var_3624_end_0, end_mask = var_3624_end_mask_0, squeeze_mask = var_3624_squeeze_mask_0, x = var_3621_cast_fp16)[name = string("op_3624_cast_fp16")];
+            tensor<int32, [4]> var_3639_begin_0 = const()[name = string("op_3639_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3639_end_0 = const()[name = string("op_3639_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3639_end_mask_0 = const()[name = string("op_3639_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3639_cast_fp16 = slice_by_index(begin = var_3639_begin_0, end = var_3639_end_0, end_mask = var_3639_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3639_cast_fp16")];
+            tensor<int32, [4]> var_3642_begin_0 = const()[name = string("op_3642_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3642_end_0 = const()[name = string("op_3642_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3642_end_mask_0 = const()[name = string("op_3642_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3642_squeeze_mask_0 = const()[name = string("op_3642_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3642_cast_fp16 = slice_by_index(begin = var_3642_begin_0, end = var_3642_end_0, end_mask = var_3642_end_mask_0, squeeze_mask = var_3642_squeeze_mask_0, x = var_3639_cast_fp16)[name = string("op_3642_cast_fp16")];
+            tensor<int32, [4]> var_3657_begin_0 = const()[name = string("op_3657_begin_0"), val = tensor<int32, [4]>([0, 7, 0, 0])];
+            tensor<int32, [4]> var_3657_end_0 = const()[name = string("op_3657_end_0"), val = tensor<int32, [4]>([1, 8, 1, 1536])];
+            tensor<bool, [4]> var_3657_end_mask_0 = const()[name = string("op_3657_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3657_cast_fp16 = slice_by_index(begin = var_3657_begin_0, end = var_3657_end_0, end_mask = var_3657_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3657_cast_fp16")];
+            tensor<int32, [4]> var_3660_begin_0 = const()[name = string("op_3660_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3660_end_0 = const()[name = string("op_3660_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3660_end_mask_0 = const()[name = string("op_3660_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3660_squeeze_mask_0 = const()[name = string("op_3660_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3660_cast_fp16 = slice_by_index(begin = var_3660_begin_0, end = var_3660_end_0, end_mask = var_3660_end_mask_0, squeeze_mask = var_3660_squeeze_mask_0, x = var_3657_cast_fp16)[name = string("op_3660_cast_fp16")];
+            tensor<int32, [4]> var_3675_begin_0 = const()[name = string("op_3675_begin_0"), val = tensor<int32, [4]>([0, 9, 0, 0])];
+            tensor<int32, [4]> var_3675_end_0 = const()[name = string("op_3675_end_0"), val = tensor<int32, [4]>([1, 10, 1, 1536])];
+            tensor<bool, [4]> var_3675_end_mask_0 = const()[name = string("op_3675_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3675_cast_fp16 = slice_by_index(begin = var_3675_begin_0, end = var_3675_end_0, end_mask = var_3675_end_mask_0, x = obj_185_cast_fp16)[name = string("op_3675_cast_fp16")];
+            tensor<int32, [4]> var_3678_begin_0 = const()[name = string("op_3678_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3678_end_0 = const()[name = string("op_3678_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3678_end_mask_0 = const()[name = string("op_3678_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3678_squeeze_mask_0 = const()[name = string("op_3678_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3678_cast_fp16 = slice_by_index(begin = var_3678_begin_0, end = var_3678_end_0, end_mask = var_3678_end_mask_0, squeeze_mask = var_3678_squeeze_mask_0, x = var_3675_cast_fp16)[name = string("op_3678_cast_fp16")];
+            tensor<int32, [4]> var_3693_begin_0 = const()[name = string("op_3693_begin_0"), val = tensor<int32, [4]>([0, 5, 0, 0])];
+            tensor<int32, [4]> var_3693_end_0 = const()[name = string("op_3693_end_0"), val = tensor<int32, [4]>([1, 6, 1, 1536])];
+            tensor<bool, [4]> var_3693_end_mask_0 = const()[name = string("op_3693_end_mask_0"), val = tensor<bool, [4]>([true, false, true, true])];
+            tensor<fp16, [1, 1, 1, 1536]> var_3693_cast_fp16 = slice_by_index(begin = var_3693_begin_0, end = var_3693_end_0, end_mask = var_3693_end_mask_0, x = obj_203_cast_fp16)[name = string("op_3693_cast_fp16")];
+            tensor<int32, [4]> var_3696_begin_0 = const()[name = string("op_3696_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3696_end_0 = const()[name = string("op_3696_end_0"), val = tensor<int32, [4]>([1, 1, 1, 1536])];
+            tensor<bool, [4]> var_3696_end_mask_0 = const()[name = string("op_3696_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3696_squeeze_mask_0 = const()[name = string("op_3696_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 1, 1536]> var_3696_cast_fp16 = slice_by_index(begin = var_3696_begin_0, end = var_3696_end_0, end_mask = var_3696_end_mask_0, squeeze_mask = var_3696_squeeze_mask_0, x = var_3693_cast_fp16)[name = string("op_3696_cast_fp16")];
+            int32 var_3703 = const()[name = string("op_3703"), val = int32(1)];
+            bool var_3704_interleave_0 = const()[name = string("op_3704_interleave_0"), val = bool(false)];
+            tensor<fp16, [1, 10, 1536]> var_3704_cast_fp16 = concat(axis = var_3703, interleave = var_3704_interleave_0, values = (var_3534_cast_fp16, var_3552_cast_fp16, var_3570_cast_fp16, var_3588_cast_fp16, var_3606_cast_fp16, var_3624_cast_fp16, var_3642_cast_fp16, var_3660_cast_fp16, var_3678_cast_fp16, var_3696_cast_fp16))[name = string("op_3704_cast_fp16")];
+            bool var_3707 = const()[name = string("op_3707"), val = bool(false)];
+            tensor<int32, [1]> obj_axes_0 = const()[name = string("obj_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1536]> alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = var_3707, x = var_3704_cast_fp16)[name = string("obj_cast_fp16")];
+        } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights);
+}
\ No newline at end of file
diff --git a/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mlmodel b/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mlmodel
new file mode 100644
index 0000000000000000000000000000000000000000..afc6e44a3de029e754e5c282b300ff94cf93f9e7
--- /dev/null
+++ b/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mlmodel
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66a95d0b170da859b01abe1b7dbe8bf75a4344775b1b7f95b3ebf702fd454bba
+size 458896
diff --git a/openai_whisper-small_216MB/TextDecoder.mlmodelc/weights/weight.bin b/openai_whisper-small_216MB/TextDecoder.mlmodelc/weights/weight.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0c7f6f4dff40304a1a056fe542f97c51f7293243
--- /dev/null
+++ b/openai_whisper-small_216MB/TextDecoder.mlmodelc/weights/weight.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c1bed2477b2cf29d09a0f7ba9904aead18177c8719ba5fb7137d7d0ff241b5e
+size 144072306